bito-code-review[bot] commented on code in PR #40346: URL: https://github.com/apache/superset/pull/40346#discussion_r3292326049
########## superset/mcp_service/saved_query/tool/get_saved_query_info.py: ########## @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Get saved query info FastMCP tool + +This module contains the FastMCP tool for getting detailed information +about a specific saved SQL query. +""" + +import logging +from datetime import datetime, timezone + +from fastmcp import Context +from superset_core.mcp.decorators import tool, ToolAnnotations + +from superset.extensions import event_logger +from superset.mcp_service.mcp_core import ModelGetInfoCore +from superset.mcp_service.saved_query.schemas import ( + GetSavedQueryInfoRequest, + SavedQueryError, + SavedQueryInfo, + serialize_saved_query_object, +) + +logger = logging.getLogger(__name__) + + +@tool( + tags=["discovery"], + class_permission_name="SavedQuery", + annotations=ToolAnnotations( + title="Get saved query info", + readOnlyHint=True, + destructiveHint=False, + ), +) +async def get_saved_query_info( + request: GetSavedQueryInfoRequest, ctx: Context +) -> SavedQueryInfo | SavedQueryError: + """Get saved query details by ID or UUID. + + Returns the full saved query including SQL text, label, database, + schema, and timestamps. + + IMPORTANT FOR LLM CLIENTS: + - Use numeric ID (e.g., 42) or UUID string (e.g., "a1b2c3d4-...") + - To find a saved query ID, use the list_saved_queries tool first + + Example usage: + ```json + { + "identifier": 42 + } + ``` + + Or with UUID: + ```json + { + "identifier": "a1b2c3d4-5678-90ab-cdef-1234567890ab" + } + ``` + """ + await ctx.info( + "Retrieving saved query information: identifier=%s" % (request.identifier,) + ) + + try: + from superset.daos.query import SavedQueryDAO + + with event_logger.log_context(action="mcp.get_saved_query_info.lookup"): + get_tool = ModelGetInfoCore( + dao_class=SavedQueryDAO, + output_schema=SavedQueryInfo, + error_schema=SavedQueryError, + serializer=serialize_saved_query_object, + supports_slug=False, + logger=logger, + ) + + result = get_tool.run_tool(request.identifier) + + if isinstance(result, SavedQueryInfo): + await ctx.info( + "Saved query information retrieved successfully: " + "saved_query_id=%s, label=%s, db_id=%s" + % ( + result.id, + result.label, + result.db_id, + ) + ) + else: + await ctx.warning( + "Saved query retrieval failed: error_type=%s, error=%s" + % (result.error_type, result.error) + ) + + return result + + except Exception as e: Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Inconsistent error handling</b></div> <div id="fix"> Inconsistent exception handling between tools in the same module: `list_saved_queries` raises exceptions (line 159) while `get_saved_query_info` returns `SavedQueryError` responses (line 125). This inconsistency can confuse callers expecting uniform behavior. </div> </div> <small><i>Code Review Run #bd7d73</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## tests/unit_tests/mcp_service/query/tool/test_query_tools.py: ########## @@ -0,0 +1,302 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import logging +from unittest.mock import MagicMock, patch + +import pytest +from fastmcp import Client +from pydantic import ValidationError + +from superset.mcp_service.app import mcp +from superset.mcp_service.query.schemas import ( + ListQueriesRequest, + QueryFilter, +) +from superset.utils import json + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + + +class TestQueryFilterSchema: + """Tests for QueryFilter schema — filterable columns.""" + + def test_invalid_filter_column_rejected(self): + """Columns not in the Literal set must be rejected.""" + with pytest.raises(ValidationError): + QueryFilter(col="not_a_real_column", opr="eq", value="test") + + def test_user_id_is_rejected_as_filter_column(self): + """user_id is an internal field and should not be a filter column.""" + with pytest.raises(ValidationError): + QueryFilter(col="user_id", opr="eq", value=1) + + def test_valid_status_filter_accepted(self): + """status is a valid filter column.""" + f = QueryFilter(col="status", opr="eq", value="success") + assert f.col == "status" + + def test_valid_database_id_filter_accepted(self): + """database_id is a valid filter column.""" + f = QueryFilter(col="database_id", opr="eq", value=1) + assert f.col == "database_id" + + def test_valid_schema_filter_accepted(self): + """schema is a valid filter column.""" + f = QueryFilter(col="schema", opr="eq", value="public") + assert f.col == "schema" + + +def create_mock_query( + query_id: int = 1, + sql: str = "SELECT * FROM table", + status: str = "success", + start_time: float = 1700000000.0, + end_time: float = 1700000001.0, + rows: int = 100, + database_id: int = 1, + schema: str = "public", + tab_name: str = "SQL Lab 1", + error_message: str | None = None, + client_id: str = "abc123", +) -> MagicMock: + """Factory function to create mock query objects with sensible defaults.""" + query = MagicMock() + query.id = query_id + query.sql = sql + query.status = status + query.start_time = start_time + query.end_time = end_time + query.rows = rows + query.database_id = database_id + query.schema = schema + query.tab_name = tab_name + query.error_message = error_message + query.client_id = client_id + query.limit = 1000 + query.progress = 100 + query.changed_on = None + return query + + [email protected] +def mcp_server(): + return mcp + + [email protected](autouse=True) +def mock_auth(): + """Mock authentication for all tests.""" + from unittest.mock import Mock, patch + + with patch("superset.mcp_service.auth.get_user_from_request") as mock_get_user: + mock_user = Mock() + mock_user.id = 1 + mock_user.username = "admin" + mock_get_user.return_value = mock_user + yield mock_get_user + + +@patch("superset.daos.query.QueryDAO.list") [email protected] +async def test_list_queries_basic(mock_list, mcp_server): + """Test basic query listing functionality.""" + query = create_mock_query() + query._mapping = { + "id": query.id, + "sql": query.sql, + "status": query.status, + "start_time": query.start_time, + "database_id": query.database_id, + "schema": query.schema, + } + mock_list.return_value = ([query], 1) + async with Client(mcp_server) as client: + request = ListQueriesRequest(page=1, page_size=10) + result = await client.call_tool( + "list_queries", {"request": request.model_dump()} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["queries"] is not None + assert len(data["queries"]) == 1 + assert data["queries"][0]["id"] == 1 + assert data["queries"][0]["status"] == "success" + + +@patch("superset.daos.query.QueryDAO.list") [email protected] +async def test_list_queries_with_status_filter(mock_list, mcp_server): + """Test query listing with status filter.""" + query = create_mock_query(status="failed", error_message="Syntax error") + query._mapping = { + "id": query.id, + "sql": query.sql, + "status": query.status, + "error_message": query.error_message, + } + mock_list.return_value = ([query], 1) + async with Client(mcp_server) as client: + request = ListQueriesRequest( + page=1, + page_size=10, + filters=[ + {"col": "status", "opr": "eq", "value": "failed"}, + ], + ) + result = await client.call_tool( + "list_queries", {"request": request.model_dump()} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["queries"] is not None + assert len(data["queries"]) == 1 + assert data["queries"][0]["status"] == "failed" + + +@patch("superset.daos.query.QueryDAO.list") [email protected] +async def test_list_queries_default_page_size(mock_list, mcp_server): + """Test that default page size is 25 for query history.""" + mock_list.return_value = ([], 0) + async with Client(mcp_server) as client: + result = await client.call_tool("list_queries", {}) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["page_size"] == 25 + + +def test_list_queries_request_rejects_both_search_and_filters(): + """Cannot use search and filters simultaneously.""" + with pytest.raises(ValidationError): + ListQueriesRequest( + search="test", + filters=[{"col": "status", "opr": "eq", "value": "success"}], + ) + + +@patch("superset.daos.query.QueryDAO.find_by_id") [email protected] +async def test_get_query_info_basic(mock_find, mcp_server): + """Test basic get query info functionality.""" + query = create_mock_query() + mock_find.return_value = query + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_query_info", {"request": {"identifier": 1}} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["id"] == 1 + assert data["status"] == "success" + assert data["database_id"] == 1 + + +@patch("superset.daos.query.QueryDAO.find_by_id") [email protected] +async def test_get_query_info_not_found(mock_find, mcp_server): + """Test get query info when query does not exist.""" + mock_find.return_value = None + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_query_info", {"request": {"identifier": 999}} + ) + assert result.data["error_type"] == "not_found" + + +@patch("superset.daos.query.QueryDAO.list") [email protected] +async def test_list_queries_empty(mock_list, mcp_server): + """Test query listing returns empty list when no results.""" + mock_list.return_value = ([], 0) + async with Client(mcp_server) as client: + request = ListQueriesRequest(page=1, page_size=10) + result = await client.call_tool( + "list_queries", {"request": request.model_dump()} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["queries"] == [] + assert data["count"] == 0 + assert data["total_count"] == 0 + + +@patch("superset.daos.query.QueryDAO.list") [email protected] +async def test_list_queries_pagination_info(mock_list, mcp_server): + """Test that pagination info is correctly returned.""" + queries = [create_mock_query(query_id=i) for i in range(1, 4)] + for q in queries: + q._mapping = {"id": q.id, "sql": q.sql, "status": q.status} + mock_list.return_value = (queries, 100) + async with Client(mcp_server) as client: + request = ListQueriesRequest(page=1, page_size=3) + result = await client.call_tool( + "list_queries", {"request": request.model_dump()} + ) + data = json.loads(result.content[0].text) + assert data["total_count"] == 100 + assert data["page_size"] == 3 + assert data["has_next"] is True + assert data["has_previous"] is False + + +@patch("superset.daos.query.QueryDAO.list") [email protected] +async def test_list_queries_default_order_is_start_time_desc(mock_list, mcp_server): + """Test that default ordering is start_time descending.""" + mock_list.return_value = ([], 0) + async with Client(mcp_server) as client: + result = await client.call_tool("list_queries", {}) + assert result.content is not None + mock_list.assert_called_once() + call_kwargs = mock_list.call_args + assert call_kwargs.kwargs.get("order_column") == "start_time" + assert call_kwargs.kwargs.get("order_direction") == "desc" + + +@patch("superset.daos.query.QueryDAO.list") [email protected] +async def test_list_queries_select_columns_projects_fields(mock_list, mcp_server): + """select_columns limits which fields appear in each query result.""" + query = create_mock_query() + query._mapping = {"id": query.id, "status": query.status} + mock_list.return_value = ([query], 1) + async with Client(mcp_server) as client: + request = ListQueriesRequest( + page=1, page_size=10, select_columns=["id", "status"] + ) + result = await client.call_tool( + "list_queries", {"request": request.model_dump()} + ) + data = json.loads(result.content[0].text) + assert data["queries"] is not None + q = data["queries"][0] + assert set(q.keys()) == {"id", "status"} + assert q["id"] == 1 + assert q["status"] == "success" + + [email protected] +async def test_list_queries_invalid_order_column_raises(mcp_server): + """order_column not in SORTABLE_QUERY_COLUMNS must be rejected.""" + request = ListQueriesRequest(page=1, page_size=10, order_column="tab_name") + async with Client(mcp_server) as client: + with pytest.raises(Exception, match="Invalid order_column"): Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Broad exception catch in test</b></div> <div id="fix"> Replace broad `Exception` catch with `ValueError` to match what the implementation raises at `mcp_core.py:205`. This prevents masking unrelated errors. </div> </div> <small><i>Code Review Run #bd7d73</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/mcp_service/query/tool/list_queries.py: ########## @@ -0,0 +1,156 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +List queries FastMCP tool + +This module contains the FastMCP tool for listing SQL query history +with filtering, search, and pagination. +""" + +import logging + +from fastmcp import Context +from superset_core.mcp.decorators import tool, ToolAnnotations + +from superset.extensions import event_logger +from superset.mcp_service.mcp_core import ModelListCore +from superset.mcp_service.query.schemas import ( + ALL_QUERY_COLUMNS, + DEFAULT_QUERY_COLUMNS, + ListQueriesRequest, + QueryError, + QueryFilter, + QueryInfo, + QueryList, + serialize_query_object, + SORTABLE_QUERY_COLUMNS, +) + +logger = logging.getLogger(__name__) + +_DEFAULT_LIST_QUERIES_REQUEST = ListQueriesRequest() + + +@tool( + tags=["core"], + class_permission_name="Query", + annotations=ToolAnnotations( + title="List queries", + readOnlyHint=True, + destructiveHint=False, + ), +) +async def list_queries( + request: ListQueriesRequest | None = None, + ctx: Context | None = None, +) -> QueryList | QueryError: + """List SQL query history with filtering and search. + + Returns recent queries executed by the current user (or all queries for + admins), including SQL text, status, timing, and database information. + Results are ordered by start_time descending (most recent first) by default. + + Sortable columns for order_column: id, start_time, end_time, status, + database_id, changed_on + """ + if ctx is None: + raise RuntimeError("FastMCP context is required for list_queries") + + request = request or _DEFAULT_LIST_QUERIES_REQUEST.model_copy(deep=True) + + await ctx.info( + "Listing queries: page=%s, page_size=%s, search=%s" + % ( + request.page, + request.page_size, + request.search, + ) + ) + await ctx.debug( + "Query listing parameters: filters=%s, order_column=%s, " + "order_direction=%s, select_columns=%s" + % ( + request.filters, + request.order_column, + request.order_direction, + request.select_columns, + ) + ) + + try: + from superset.daos.query import QueryDAO + + def _serialize_query(obj: object, cols: list[str] | None) -> QueryInfo | None: + return serialize_query_object(obj) + + list_tool = ModelListCore( + dao_class=QueryDAO, + output_schema=QueryInfo, + item_serializer=_serialize_query, + filter_type=QueryFilter, + default_columns=DEFAULT_QUERY_COLUMNS, + search_columns=["tab_name", "sql"], + list_field_name="queries", + output_list_schema=QueryList, + all_columns=ALL_QUERY_COLUMNS, + sortable_columns=SORTABLE_QUERY_COLUMNS, + logger=logger, + ) + + with event_logger.log_context(action="mcp.list_queries.query"): + result = list_tool.run_tool( + filters=request.filters, + search=request.search, + select_columns=request.select_columns, + order_column=request.order_column or "start_time", + order_direction=request.order_direction, + page=max(request.page - 1, 0), + page_size=request.page_size, + ) + + await ctx.info( + "Queries listed successfully: count=%s, total_count=%s, total_pages=%s" + % ( + len(result.queries) if hasattr(result, "queries") else 0, + getattr(result, "total_count", None), + getattr(result, "total_pages", None), + ) + ) + + columns_to_filter = result.columns_requested + await ctx.debug( + "Applying field filtering via serialization context: columns=%s" + % (columns_to_filter,) + ) + with event_logger.log_context(action="mcp.list_queries.serialization"): + return result.model_dump( + mode="json", + context={"select_columns": columns_to_filter}, + ) + + except Exception as e: + await ctx.error( + "Query listing failed: page=%s, page_size=%s, error=%s, error_type=%s" + % ( + request.page, + request.page_size, + str(e), + type(e).__name__, + ) + ) + raise Review Comment: <div> <div id="suggestion"> <div id="issue"><b>CWE-390: Redundant except-Raise</b></div> <div id="fix"> At line 146, a bare `except Exception` is caught but then immediately re-raised at line 156, which defeats the purpose of the catch block. The error logging is redundant since the `GlobalErrorHandlerMiddleware` will log the exception anyway. Compare with `get_query_info.py` (lines 118-121) which returns a `QueryError` response instead of re-raising, providing a consistent API contract that callers can handle. If re-raising is intended, remove the catch block entirely. (See also: [CWE-390](https://cwe.mitre.org/data/definitions/390.html)) </div> </div> <small><i>Code Review Run #bd7d73</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
