This is an automated email from the ASF dual-hosted git repository.

aminghadersohi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 85935b0b882 fix(mcp): handle SSL connection drop during pre-call 
session teardown (#39917)
85935b0b882 is described below

commit 85935b0b882225eec09e248a2d19a4dcdf5737c4
Author: Amin Ghadersohi <[email protected]>
AuthorDate: Tue May 12 02:32:14 2026 -0400

    fix(mcp): handle SSL connection drop during pre-call session teardown 
(#39917)
---
 superset/mcp_service/auth.py                       | 45 ++++++++++++++++++++--
 .../mcp_service/test_auth_user_resolution.py       | 44 +++++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/superset/mcp_service/auth.py b/superset/mcp_service/auth.py
index 9bfb1e69fd0..583b721b9ea 100644
--- a/superset/mcp_service/auth.py
+++ b/superset/mcp_service/auth.py
@@ -516,6 +516,47 @@ def _cleanup_session_on_error() -> None:
         logger.warning("Error cleaning up session after exception: %s", e)
 
 
+def _remove_session_safe() -> None:
+    """Remove the scoped SQLAlchemy session, tolerating SSL/connection errors.
+
+    Thread-pool workers reuse threads across requests.  Before each tool call
+    the session is removed to prevent a prior request's thread-local session
+    from leaking into the next one.  If the underlying DBAPI connection died
+    between requests (e.g. RDS SSL idle-timeout or max-connection-age), the
+    rollback implicit in ``session.close()`` raises a ``DBAPIError`` subclass
+    (``OperationalError`` for psycopg2, ``InterfaceError`` for some other
+    drivers).
+
+    When that happens:
+    1. Invalidate the dead connection so the pool discards it (rather than
+       returning a broken connection to the next caller).
+    2. Retry ``remove()`` to deregister the session from the scoped registry.
+
+    The tool call still proceeds because a fresh connection will be obtained
+    on the next DB access.
+    """
+    from sqlalchemy.exc import DBAPIError
+
+    from superset.extensions import db
+
+    try:
+        db.session.remove()
+    except DBAPIError as exc:
+        logger.warning(
+            "Connection error during pre-call session cleanup "
+            "(likely SSL/idle timeout); invalidating connection and retrying: 
%s",
+            exc,
+        )
+        try:
+            db.session.invalidate()
+        except Exception as invalidate_exc:
+            logger.debug(
+                "Could not invalidate session after connection error: %s",
+                invalidate_exc,
+            )
+        db.session.remove()  # retry: session deregisters cleanly after 
invalidation
+
+
 def mcp_auth_hook(tool_func: F) -> F:  # noqa: C901
     """
     Authentication and authorization decorator for MCP tools.
@@ -638,9 +679,7 @@ def mcp_auth_hook(tool_func: F) -> F:  # noqa: C901
                 # still be bound to a different tenant's DB engine. Removing 
it here
                 # ensures the next DB access creates a fresh session bound to 
the
                 # correct engine for the current request.
-                from superset.extensions import db
-
-                db.session.remove()
+                _remove_session_safe()
                 user = _setup_user_context()
 
                 # No Flask context - this is a FastMCP internal operation
diff --git a/tests/unit_tests/mcp_service/test_auth_user_resolution.py 
b/tests/unit_tests/mcp_service/test_auth_user_resolution.py
index 3d6850954d5..34669e51d1e 100644
--- a/tests/unit_tests/mcp_service/test_auth_user_resolution.py
+++ b/tests/unit_tests/mcp_service/test_auth_user_resolution.py
@@ -409,6 +409,50 @@ def 
test_mcp_auth_hook_removes_stale_db_session_in_sync_wrapper(app) -> None:
     assert result == "fresh"
 
 
+def test_sync_wrapper_handles_ssl_error_on_pre_call_remove(app) -> None:
+    """sync_wrapper tolerates OperationalError from db.session.remove() before 
the call.
+
+    If the underlying DBAPI connection died between requests (e.g. RDS SSL
+    idle-timeout), the rollback implicit in session.close() raises
+    OperationalError.  _remove_session_safe() should:
+    - Log a warning
+    - Call session.invalidate() to mark the dead connection for pool discard
+    - Retry session.remove() so the registry is clean
+    - Allow the tool to run successfully
+    """
+    from sqlalchemy.exc import OperationalError as SAOperationalError
+
+    fresh_user = _make_mock_user("fresh")
+
+    def dummy_tool() -> str:
+        """Dummy sync tool."""
+        return g.user.username
+
+    wrapped = mcp_auth_hook(dummy_tool)
+
+    with app.test_request_context():
+        g.user = fresh_user
+        with patch("superset.extensions.db") as mock_db:
+            mock_db.session.remove.side_effect = [
+                SAOperationalError(
+                    "SSL connection has been closed unexpectedly", None, None
+                ),
+                None,  # second call succeeds
+            ]
+
+            with patch(
+                "superset.mcp_service.auth.get_user_from_request",
+                return_value=fresh_user,
+            ):
+                result = wrapped()
+
+    assert result == "fresh"
+    assert mock_db.session.invalidate.called, "invalidate() must be called on 
SSL error"
+    assert mock_db.session.remove.call_count == 2, (
+        "remove() must be retried after SSL error"
+    )
+
+
 # -- default_user_resolver --
 
 

Reply via email to