bito-code-review[bot] commented on code in PR #40346: URL: https://github.com/apache/superset/pull/40346#discussion_r3311912388
########## superset/mcp_service/saved_query/tool/get_saved_query_info.py: ########## @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Get saved query info FastMCP tool + +This module contains the FastMCP tool for getting detailed information +about a specific saved SQL query. +""" + +import logging +from datetime import datetime, timezone + +from fastmcp import Context +from superset_core.mcp.decorators import tool, ToolAnnotations + +from superset.extensions import event_logger +from superset.mcp_service.mcp_core import ModelGetInfoCore +from superset.mcp_service.saved_query.schemas import ( + GetSavedQueryInfoRequest, + SavedQueryError, + SavedQueryInfo, + serialize_saved_query_object, +) + +logger = logging.getLogger(__name__) + + +@tool( + tags=["discovery"], + class_permission_name="SavedQuery", + annotations=ToolAnnotations( + title="Get saved query info", + readOnlyHint=True, + destructiveHint=False, + ), +) +async def get_saved_query_info( + request: GetSavedQueryInfoRequest, ctx: Context +) -> SavedQueryInfo | SavedQueryError: + """Get saved query details by ID or UUID. + + Returns the full saved query including SQL text, label, database, + schema, and timestamps. + + IMPORTANT FOR LLM CLIENTS: + - Use numeric ID (e.g., 42) or UUID string (e.g., "a1b2c3d4-...") + - To find a saved query ID, use the list_saved_queries tool first + + Example usage: + ```json + { + "identifier": 42 + } + ``` + + Or with UUID: + ```json + { + "identifier": "a1b2c3d4-5678-90ab-cdef-1234567890ab" + } + ``` + """ + await ctx.info( + "Retrieving saved query information: identifier=%s" % (request.identifier,) + ) + + try: + from superset.daos.query import SavedQueryDAO + + with event_logger.log_context(action="mcp.get_saved_query_info.lookup"): + get_tool = ModelGetInfoCore( + dao_class=SavedQueryDAO, + output_schema=SavedQueryInfo, + error_schema=SavedQueryError, + serializer=serialize_saved_query_object, + supports_slug=False, + logger=logger, + ) + + result = get_tool.run_tool(request.identifier) + + if isinstance(result, SavedQueryInfo): + await ctx.info( + "Saved query information retrieved successfully: " + "saved_query_id=%s, label=%s, db_id=%s" + % ( + result.id, + result.label, + result.db_id, + ) + ) + else: + await ctx.warning( + "Saved query retrieval failed: error_type=%s, error=%s" + % (result.error_type, result.error) + ) + + return result + + except Exception as e: Review Comment: <!-- Bito Reply --> The error handling in `get_saved_query_info` was updated to return a generic message (`"Failed to get saved query info"`) in the error response, while the full exception details are logged server-side via `ctx.error()`. This change addresses the security concern of exposing raw exception text to clients. The design asymmetry between list tools and get-info tools remains intentional, as it mirrors the established pattern in the codebase. **superset/mcp_service/saved_query/tool/get_saved_query_info.py** ``` except Exception as e: await ctx.error( f"Failed to get saved query info: {str(e)}" ) return SavedQueryError( error_type="get_saved_query_info_failed", error="Failed to get saved query info" ) ``` ########## superset/mcp_service/query/tool/list_queries.py: ########## @@ -0,0 +1,156 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +List queries FastMCP tool + +This module contains the FastMCP tool for listing SQL query history +with filtering, search, and pagination. +""" + +import logging + +from fastmcp import Context +from superset_core.mcp.decorators import tool, ToolAnnotations + +from superset.extensions import event_logger +from superset.mcp_service.mcp_core import ModelListCore +from superset.mcp_service.query.schemas import ( + ALL_QUERY_COLUMNS, + DEFAULT_QUERY_COLUMNS, + ListQueriesRequest, + QueryError, + QueryFilter, + QueryInfo, + QueryList, + serialize_query_object, + SORTABLE_QUERY_COLUMNS, +) + +logger = logging.getLogger(__name__) + +_DEFAULT_LIST_QUERIES_REQUEST = ListQueriesRequest() + + +@tool( + tags=["core"], + class_permission_name="Query", + annotations=ToolAnnotations( + title="List queries", + readOnlyHint=True, + destructiveHint=False, + ), +) +async def list_queries( + request: ListQueriesRequest | None = None, + ctx: Context | None = None, +) -> QueryList | QueryError: + """List SQL query history with filtering and search. + + Returns recent queries executed by the current user (or all queries for + admins), including SQL text, status, timing, and database information. + Results are ordered by start_time descending (most recent first) by default. + + Sortable columns for order_column: id, start_time, end_time, status, + database_id, changed_on + """ + if ctx is None: + raise RuntimeError("FastMCP context is required for list_queries") + + request = request or _DEFAULT_LIST_QUERIES_REQUEST.model_copy(deep=True) + + await ctx.info( + "Listing queries: page=%s, page_size=%s, search=%s" + % ( + request.page, + request.page_size, + request.search, + ) + ) + await ctx.debug( + "Query listing parameters: filters=%s, order_column=%s, " + "order_direction=%s, select_columns=%s" + % ( + request.filters, + request.order_column, + request.order_direction, + request.select_columns, + ) + ) + + try: + from superset.daos.query import QueryDAO + + def _serialize_query(obj: object, cols: list[str] | None) -> QueryInfo | None: + return serialize_query_object(obj) + + list_tool = ModelListCore( + dao_class=QueryDAO, + output_schema=QueryInfo, + item_serializer=_serialize_query, + filter_type=QueryFilter, + default_columns=DEFAULT_QUERY_COLUMNS, + search_columns=["tab_name", "sql"], + list_field_name="queries", + output_list_schema=QueryList, + all_columns=ALL_QUERY_COLUMNS, + sortable_columns=SORTABLE_QUERY_COLUMNS, + logger=logger, + ) + + with event_logger.log_context(action="mcp.list_queries.query"): + result = list_tool.run_tool( + filters=request.filters, + search=request.search, + select_columns=request.select_columns, + order_column=request.order_column or "start_time", + order_direction=request.order_direction, + page=max(request.page - 1, 0), + page_size=request.page_size, + ) + + await ctx.info( + "Queries listed successfully: count=%s, total_count=%s, total_pages=%s" + % ( + len(result.queries) if hasattr(result, "queries") else 0, + getattr(result, "total_count", None), + getattr(result, "total_pages", None), + ) + ) + + columns_to_filter = result.columns_requested + await ctx.debug( + "Applying field filtering via serialization context: columns=%s" + % (columns_to_filter,) + ) + with event_logger.log_context(action="mcp.list_queries.serialization"): + return result.model_dump( + mode="json", + context={"select_columns": columns_to_filter}, + ) + + except Exception as e: + await ctx.error( + "Query listing failed: page=%s, page_size=%s, error=%s, error_type=%s" + % ( + request.page, + request.page_size, + str(e), + type(e).__name__, + ) + ) + raise Review Comment: <!-- Bito Reply --> The `str(e)` concern in `get_saved_query_info` has been addressed by returning a generic message to the client while retaining detailed error information in the server logs. This aligns with secure error-handling practices. The `list_saved_queries` function's catch-log-reraise pattern is intentional and consistent with other list tools like `list_queries.py`, as it provides structured observability for LLM clients. This pattern is not redundant and is part of a broader design choice to maintain consistency across tools. If the intent is to refactor this pattern, it would require a broader cross-tool effort. ########## superset/mcp_service/saved_query/schemas.py: ########## @@ -0,0 +1,261 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Pydantic schemas for saved query-related responses +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Annotated, Any, Dict, List, Literal + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + field_validator, + model_serializer, + model_validator, + PositiveInt, +) + +from superset.daos.base import ColumnOperator, ColumnOperatorEnum +from superset.mcp_service.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE +from superset.mcp_service.privacy import filter_user_directory_fields +from superset.mcp_service.system.schemas import PaginationInfo +from superset.mcp_service.utils.schema_utils import ( + parse_json_or_list, + parse_json_or_model_list, +) + +DEFAULT_SAVED_QUERY_COLUMNS = ["id", "label", "db_id", "schema", "uuid"] +SORTABLE_SAVED_QUERY_COLUMNS = [ + "id", + "label", + "db_id", + "schema", + "changed_on", + "created_on", +] +ALL_SAVED_QUERY_COLUMNS = [ + "id", + "label", + "db_id", + "schema", + "uuid", + "sql", + "description", + "changed_on", + "created_on", +] + + +class SavedQueryFilter(ColumnOperator): + """ + Filter object for saved query listing. + col: The column to filter on. Must be one of the allowed filter fields. + opr: The operator to use. Must be one of the supported operators. + value: The value to filter by (type depends on col and opr). + """ + + col: Literal["label", "db_id", "schema"] = Field( + ..., + description="Column to filter on.", + ) + opr: ColumnOperatorEnum = Field( + ..., + description="Operator to use.", + ) + value: str | int | float | bool | List[str | int | float | bool] = Field( + ..., description="Value to filter by (type depends on col and opr)" + ) + + +class SavedQueryInfo(BaseModel): + id: int | None = Field(None, description="Saved query ID") + uuid: str | None = Field(None, description="Saved query UUID") + label: str | None = Field(None, description="Saved query label/name") + sql: str | None = Field(None, description="SQL query text") + db_id: int | None = Field(None, description="Database connection ID") + schema: str | None = Field(None, description="Database schema name") + description: str | None = Field(None, description="User-provided description") + changed_on: str | datetime | None = Field( + None, description="Last modification timestamp" + ) + created_on: str | datetime | None = Field(None, description="Creation timestamp") + model_config = ConfigDict( + from_attributes=True, + ser_json_timedelta="iso8601", + populate_by_name=True, + ) + + @model_serializer(mode="wrap") + def _filter_fields_by_context(self, serializer: Any, info: Any) -> Dict[str, Any]: + data = filter_user_directory_fields(serializer(self)) + + if info.context and isinstance(info.context, dict): + select_columns = info.context.get("select_columns") + if select_columns: + requested_fields = set(select_columns) + return {k: v for k, v in data.items() if k in requested_fields} + + return data + + +class SavedQueryList(BaseModel): + saved_queries: List[SavedQueryInfo] + count: int + total_count: int + page: int + page_size: int + total_pages: int + has_previous: bool + has_next: bool + columns_requested: List[str] = Field( + default_factory=list, + description="Requested columns for the response", + ) + columns_loaded: List[str] = Field( + default_factory=list, + description="Columns that were actually loaded for each saved query", + ) + columns_available: List[str] = Field( + default_factory=list, + description="All columns available for selection via select_columns parameter", + ) + sortable_columns: List[str] = Field( + default_factory=list, + description="Columns that can be used with order_column parameter", + ) + filters_applied: List[SavedQueryFilter] = Field( + default_factory=list, + description="List of advanced filter dicts applied to the query.", + ) + pagination: PaginationInfo | None = None + timestamp: datetime | None = None + model_config = ConfigDict(ser_json_timedelta="iso8601") + + +class ListSavedQueriesRequest(BaseModel): + """Request schema for list_saved_queries.""" + + filters: Annotated[ + List[SavedQueryFilter], + Field( + default_factory=list, + description="List of filter objects (column, operator, value). Each " + "filter is an object with 'col', 'opr', and 'value' " + "properties. Cannot be used together with 'search'.", + ), + ] + select_columns: Annotated[ + List[str], + Field( + default_factory=list, + description="List of columns to select. Defaults to common columns if not " + "specified.", + ), + ] + search: Annotated[ + str | None, + Field( + default=None, + description="Text search string to match against saved query fields. " + "Cannot be used together with 'filters'.", + ), + ] + order_column: Annotated[ + str | None, Field(default=None, description="Column to order results by") + ] + order_direction: Annotated[ + Literal["asc", "desc"], + Field( + default="desc", description="Direction to order results ('asc' or 'desc')" + ), + ] + page: Annotated[ + PositiveInt, + Field(default=1, description="Page number for pagination (1-based)"), + ] + page_size: Annotated[ + int, + Field( + default=DEFAULT_PAGE_SIZE, + gt=0, + le=MAX_PAGE_SIZE, + description=f"Number of items per page (max {MAX_PAGE_SIZE})", + ), + ] + + @field_validator("filters", mode="before") + @classmethod + def parse_filters(cls, v: Any) -> List[SavedQueryFilter]: + """Accept both JSON string and list of objects.""" + return parse_json_or_model_list(v, SavedQueryFilter, "filters") + + @field_validator("select_columns", mode="before") + @classmethod + def parse_columns(cls, v: Any) -> List[str]: + """Accept JSON array, list, or comma-separated string.""" + return parse_json_or_list(v, "select_columns") + + @model_validator(mode="after") + def validate_search_and_filters(self) -> "ListSavedQueriesRequest": + """Prevent using both search and filters simultaneously.""" + if self.search and self.filters: + raise ValueError( + "Cannot use both 'search' and 'filters' parameters simultaneously. " + "Use either 'search' for text-based searching across multiple fields, " + "or 'filters' for precise column-based filtering, but not both." + ) + return self Review Comment: <!-- Bito Reply --> The suggestion to add `created_by_me` and `owned_by_me` fields to the `ListSavedQueriesRequest` schema was made by the reviewer (Bito) in comment c1. The user (Aminghadersohi) responded that these flags are redundant for the typical case since `list_saved_queries` already scopes results to the current user via `SavedQueryDAO`'s base filter. The user also mentioned that adding these flags would require consistency with the `list_queries` tool and is out of scope for this PR. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
