bito-code-review[bot] commented on code in PR #40304: URL: https://github.com/apache/superset/pull/40304#discussion_r3287207852
########## superset/mcp_service/action_log/schemas.py: ########## @@ -0,0 +1,243 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Pydantic schemas for action-log MCP tools.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Annotated, Any, Literal + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + field_validator, + model_serializer, + model_validator, + PositiveInt, +) + +from superset.daos.base import ColumnOperator, ColumnOperatorEnum +from superset.mcp_service.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE +from superset.mcp_service.system.schemas import PaginationInfo +from superset.mcp_service.utils.schema_utils import ( + parse_json_or_list, + parse_json_or_model_list, +) + +DEFAULT_LOG_COLUMNS: list[str] = ["id", "action", "user_id", "dttm"] +ALL_LOG_COLUMNS: list[str] = [ + "id", + "action", + "user_id", + "dttm", + "dashboard_id", + "slice_id", + "json", +] +LOG_SORTABLE_COLUMNS: list[str] = ["id", "dttm"] + + +class ActionLogFilter(ColumnOperator): + """Filter object for action-log listing. + + col: Column to filter on. + opr: Operator to use. + value: Value to filter by. + """ + + col: Literal["action", "user_id", "dashboard_id", "slice_id", "dttm"] = Field( + ..., + description="Column to filter on.", + ) + opr: ColumnOperatorEnum = Field(..., description="Operator to use.") + value: str | int | float | bool | list[str | int | float | bool] = Field( + ..., description="Value to filter by" + ) + + +class ActionLogInfo(BaseModel): + id: int | None = Field(None, description="Log entry ID") + action: str | None = Field(None, description="Action name") + user_id: int | None = Field( + None, description="ID of the user who performed the action" + ) + dttm: str | datetime | None = Field(None, description="Timestamp of the action") + dashboard_id: int | None = Field(None, description="Associated dashboard ID") + slice_id: int | None = Field(None, description="Associated chart/slice ID") + json: str | None = Field(None, description="JSON payload of the action") + + model_config = ConfigDict( + from_attributes=True, + ser_json_timedelta="iso8601", + populate_by_name=True, + ) + + def model_post_init(self, __context: Any) -> None: + if isinstance(self.dttm, datetime) and self.dttm.tzinfo is None: + from datetime import timezone + + object.__setattr__(self, "dttm", self.dttm.replace(tzinfo=timezone.utc)) + + @model_serializer(mode="wrap") + def _filter_fields_by_context(self, serializer: Any, info: Any) -> dict[str, Any]: + data = serializer(self) + if info.context and isinstance(info.context, dict): + select_columns = info.context.get("select_columns") + if select_columns: + requested_fields = set(select_columns) + return {k: v for k, v in data.items() if k in requested_fields} + return data + + +class ActionLogList(BaseModel): + action_logs: list[ActionLogInfo] + count: int + total_count: int + page: int + page_size: int + total_pages: int + has_previous: bool + has_next: bool + columns_requested: list[str] = Field(default_factory=list) + columns_loaded: list[str] = Field(default_factory=list) + columns_available: list[str] = Field(default_factory=list) + sortable_columns: list[str] = Field(default_factory=list) + filters_applied: list[ActionLogFilter] = Field(default_factory=list) + pagination: PaginationInfo | None = None + timestamp: datetime | None = None + model_config = ConfigDict(ser_json_timedelta="iso8601") + + +class ListActionLogsRequest(BaseModel): + """Request schema for list_action_logs.""" + + filters: Annotated[ + list[ActionLogFilter], + Field( + default_factory=list, + description=( + "List of filter objects (col, opr, value). " + "Filter columns: action, user_id, dashboard_id, slice_id, dttm. " + "Cannot be used with 'search'." + ), + ), + ] + select_columns: Annotated[ + list[str], + Field( + default_factory=list, + description="Columns to return. Defaults to common columns.", + ), + ] + search: Annotated[ + str | None, + Field( + default=None, + description=( + "Text search string matched against action. " + "Cannot be used together with 'filters'." + ), + ), + ] + order_column: Annotated[ + str | None, + Field(default=None, description="Column to sort by (default: dttm)"), + ] + order_direction: Annotated[ + Literal["asc", "desc"], + Field(default="desc", description="Sort direction ('asc' or 'desc')"), + ] + page: Annotated[ + PositiveInt, + Field(default=1, description="Page number (1-based)"), + ] + page_size: Annotated[ + int, + Field( + default=DEFAULT_PAGE_SIZE, + gt=0, + le=MAX_PAGE_SIZE, + description=f"Items per page (max {MAX_PAGE_SIZE})", + ), + ] + + @field_validator("filters", mode="before") + @classmethod + def parse_filters(cls, v: Any) -> list[ActionLogFilter]: + return parse_json_or_model_list(v, ActionLogFilter, "filters") + + @field_validator("select_columns", mode="before") + @classmethod + def parse_columns(cls, v: Any) -> list[str]: + return parse_json_or_list(v, "select_columns") + + @model_validator(mode="after") + def validate_search_and_filters(self) -> "ListActionLogsRequest": + if self.search and self.filters: + raise ValueError( + "Cannot use both 'search' and 'filters' simultaneously. " + "Use 'search' for text matching on action, or 'filters' for " + "column-based filtering, but not both." + ) + return self + + +class ActionLogError(BaseModel): + error: str = Field(..., description="Error message") + error_type: str = Field(..., description="Error type") + timestamp: str | datetime | None = Field(None, description="Error timestamp") + model_config = ConfigDict(ser_json_timedelta="iso8601") + + @classmethod + def create(cls, error: str, error_type: str) -> "ActionLogError": + from datetime import timezone + + return cls( + error=error, + error_type=error_type, + timestamp=datetime.now(timezone.utc), + ) Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Missing error sanitization validator</b></div> <div id="fix"> ActionLogError is missing the sanitize_for_llm_context validator present in all other error schemas. Error messages could contain unsanitized content when exposed to LLM context. Add the validator decorator to match DashboardError (line 123-127), ChartError (line 188-191), and DatasetError (line 301-304). </div> <details> <summary> <b>Code suggestion</b> </summary> <blockquote>Check the AI-generated fix before applying</blockquote> <div id="code"> ``` --- superset/mcp_service/action_log/schemas.py (lines 201-215) --- 201: class ActionLogError(BaseModel): 202: error: str = Field(..., description="Error message") 203: error_type: str = Field(..., description="Error type") 204: timestamp: str | datetime | None = Field(None, description="Error timestamp") 205: model_config = ConfigDict(ser_json_timedelta="iso8601") 206: 207: + @field_validator("error") 208: + @classmethod 209: + def sanitize_error_for_llm_context(cls, value: str) -> str: 210: + """Wrap error text before it is exposed to LLM context.""" 211: + from superset.mcp_service.utils import sanitize_for_llm_context 212: + 213: + return sanitize_for_llm_context(value, field_path=("error",)) 214: + 215: @classmethod 216: def create(cls, error: str, error_type: str) -> "ActionLogError": ``` </div> </details> </div> <small><i>Code Review Run #8033ab</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/mcp_service/task/tool/get_task_info.py: ########## @@ -0,0 +1,108 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Get task info MCP tool.""" + +import logging +from datetime import datetime, timezone + +from fastmcp import Context +from superset_core.mcp.decorators import tool, ToolAnnotations + +from superset.extensions import event_logger +from superset.mcp_service.mcp_core import ModelGetInfoCore +from superset.mcp_service.task.schemas import ( + GetTaskInfoRequest, + serialize_task_object, + TaskError, + TaskInfo, +) + +logger = logging.getLogger(__name__) + + +@tool( + tags=["discovery"], + class_permission_name="Task", + annotations=ToolAnnotations( + title="Get task info", + readOnlyHint=True, + destructiveHint=False, + ), +) +async def get_task_info( + request: GetTaskInfoRequest, + ctx: Context, +) -> TaskInfo | TaskError: + """Get details for a single async task by ID or UUID. + + Returns task_type, status, scope, and timestamps for the specified task. + Non-admin users can only retrieve tasks they are subscribed to. + + Use list_tasks to discover task IDs and UUIDs. + + Example usage: + ```json + {"identifier": 42} + ``` + + Or with UUID: + ```json + {"identifier": "a1b2c3d4-5678-90ab-cdef-1234567890ab"} + ``` + """ + await ctx.info("Retrieving task: identifier=%s" % (request.identifier,)) + + try: + from superset.daos.tasks import TaskDAO + + with event_logger.log_context(action="mcp.get_task_info.lookup"): + # ModelGetInfoCore handles int ID and UUID string automatically. + # TaskDAO.base_filter (TaskFilter) enforces subscription-based access. + get_tool = ModelGetInfoCore( + dao_class=TaskDAO, + output_schema=TaskInfo, + error_schema=TaskError, + serializer=serialize_task_object, + supports_slug=False, + logger=logger, + ) + result = get_tool.run_tool(request.identifier) + + if isinstance(result, TaskInfo): + await ctx.info( + "Task retrieved: id=%s, task_type=%s, status=%s" + % (result.id, result.task_type, result.status) + ) + else: + await ctx.warning( + "Task retrieval failed: error_type=%s, error=%s" + % (result.error_type, result.error) + ) + + return result + + except Exception as e: Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Broad exception catch</b></div> <div id="fix"> Broad `except Exception` at line 99 masks unexpected errors that may indicate real bugs. Consider catching specific exceptions (e.g., `DAOFindFailedError`, `SQLAlchemyError`) or re-raising unexpected ones to avoid silent failure on unexpected error types. </div> </div> <small><i>Code Review Run #8033ab</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
