codeant-ai-for-open-source[bot] commented on code in PR #40344:
URL: https://github.com/apache/superset/pull/40344#discussion_r3305680763


##########
superset/mcp_service/task/schemas.py:
##########
@@ -0,0 +1,240 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Pydantic schemas for task MCP tools."""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Annotated, Any, Literal
+
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    field_validator,
+    model_serializer,
+    model_validator,
+    PositiveInt,
+)
+
+from superset.daos.base import ColumnOperator, ColumnOperatorEnum
+from superset.mcp_service.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
+from superset.mcp_service.system.schemas import PaginationInfo
+from superset.mcp_service.utils.schema_utils import (
+    parse_json_or_list,
+    parse_json_or_model_list,
+)
+
+DEFAULT_TASK_COLUMNS: list[str] = ["id", "uuid", "task_type", "status", 
"changed_on"]
+ALL_TASK_COLUMNS: list[str] = [
+    "id",
+    "uuid",
+    "task_type",
+    "task_key",
+    "task_name",
+    "status",
+    "scope",
+    "changed_on",
+    "created_on",
+]
+TASK_SORTABLE_COLUMNS: list[str] = ["id", "changed_on", "created_on", "status"]
+
+
+class TaskColumnFilter(ColumnOperator):
+    """Filter object for task listing.
+
+    col: Column to filter on.
+    opr: Operator to use.
+    value: Value to filter by.
+    """
+
+    col: Literal["task_type", "status", "scope"] = Field(
+        ...,
+        description="Column to filter on.",
+    )
+    opr: ColumnOperatorEnum = Field(..., description="Operator to use.")
+    value: str | int | float | bool | list[str | int | float | bool] = Field(
+        ..., description="Value to filter by"
+    )
+
+
+class TaskInfo(BaseModel):
+    id: int | None = Field(None, description="Task ID")
+    uuid: str | None = Field(None, description="Task UUID")
+    task_type: str | None = Field(None, description="Task type (e.g., 
sql_execution)")
+    task_key: str | None = Field(None, description="Task deduplication key")
+    task_name: str | None = Field(None, description="Human-readable task name")
+    status: str | None = Field(None, description="Task status")
+    scope: str | None = Field(None, description="Task scope 
(private/shared/system)")
+    changed_on: str | datetime | None = Field(
+        None, description="Last modification timestamp"
+    )
+    created_on: str | datetime | None = Field(None, description="Creation 
timestamp")
+
+    model_config = ConfigDict(
+        from_attributes=True,
+        ser_json_timedelta="iso8601",
+        populate_by_name=True,
+    )
+
+    @model_serializer(mode="wrap")
+    def _filter_fields_by_context(self, serializer: Any, info: Any) -> 
dict[str, Any]:
+        data = serializer(self)
+        if info.context and isinstance(info.context, dict):
+            select_columns = info.context.get("select_columns")
+            if select_columns:
+                requested_fields = set(select_columns)
+                return {k: v for k, v in data.items() if k in requested_fields}
+        return data
+
+
+class TaskList(BaseModel):
+    tasks: list[TaskInfo]
+    count: int
+    total_count: int
+    page: int
+    page_size: int
+    total_pages: int
+    has_previous: bool
+    has_next: bool
+    columns_requested: list[str] = Field(default_factory=list)
+    columns_loaded: list[str] = Field(default_factory=list)
+    columns_available: list[str] = Field(default_factory=list)
+    sortable_columns: list[str] = Field(default_factory=list)
+    filters_applied: list[TaskColumnFilter] = Field(default_factory=list)
+    pagination: PaginationInfo | None = None
+    timestamp: datetime | None = None
+    model_config = ConfigDict(ser_json_timedelta="iso8601")
+
+
+class ListTasksRequest(BaseModel):
+    """Request schema for list_tasks."""
+
+    filters: Annotated[
+        list[TaskColumnFilter],
+        Field(
+            default_factory=list,
+            description=(
+                "List of filter objects (col, opr, value). "
+                "Filter columns: task_type, status, scope. "
+                "Cannot be used with 'search'."
+            ),
+        ),
+    ]
+    select_columns: Annotated[
+        list[str],
+        Field(
+            default_factory=list,
+            description="Columns to return. Defaults to common columns.",
+        ),
+    ]
+    search: Annotated[
+        str | None,
+        Field(
+            default=None,
+            description=(
+                "Text search string matched against task_type, task_key, "
+                "task_name, status, and scope. "
+                "Cannot be used together with 'filters'."
+            ),
+        ),
+    ]
+    order_column: Annotated[
+        str | None,
+        Field(default=None, description="Column to sort by (default: 
changed_on)"),
+    ]
+    order_direction: Annotated[
+        Literal["asc", "desc"],
+        Field(default="desc", description="Sort direction ('asc' or 'desc')"),
+    ]
+    page: Annotated[
+        PositiveInt,
+        Field(default=1, description="Page number (1-based)"),
+    ]
+    page_size: Annotated[
+        int,
+        Field(
+            default=DEFAULT_PAGE_SIZE,
+            gt=0,
+            le=MAX_PAGE_SIZE,
+            description=f"Items per page (max {MAX_PAGE_SIZE})",
+        ),
+    ]
+
+    @field_validator("filters", mode="before")
+    @classmethod
+    def parse_filters(cls, v: Any) -> list[TaskColumnFilter]:
+        return parse_json_or_model_list(v, TaskColumnFilter, "filters")
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def parse_columns(cls, v: Any) -> list[str]:
+        return parse_json_or_list(v, "select_columns")
+
+    @model_validator(mode="after")
+    def validate_search_and_filters(self) -> "ListTasksRequest":
+        if self.search and self.filters:
+            raise ValueError(
+                "Cannot use both 'search' and 'filters' simultaneously. "
+                "Use 'search' for text matching on task_type/status/scope, or "
+                "'filters' for column-based filtering, but not both."
+            )
+        return self
+
+
+class TaskError(BaseModel):
+    error: str = Field(..., description="Error message")
+    error_type: str = Field(..., description="Error type")
+    timestamp: str | datetime | None = Field(None, description="Error 
timestamp")
+    model_config = ConfigDict(ser_json_timedelta="iso8601")
+
+    @classmethod
+    def create(cls, error: str, error_type: str) -> "TaskError":
+        from datetime import timezone
+
+        return cls(
+            error=error,
+            error_type=error_type,
+            timestamp=datetime.now(timezone.utc),
+        )
+
+
+class GetTaskInfoRequest(BaseModel):
+    """Request schema for get_task_info (ID or UUID lookup)."""
+
+    identifier: Annotated[
+        int | str,
+        Field(description="Task identifier — numeric ID or UUID string"),
+    ]
+
+
+def serialize_task_object(task: Any) -> TaskInfo | None:
+    if not task:
+        return None
+    uuid_val = getattr(task, "uuid", None)
+    return TaskInfo(
+        id=getattr(task, "id", None),
+        uuid=str(uuid_val) if uuid_val is not None else None,
+        task_type=getattr(task, "task_type", None),
+        task_key=getattr(task, "task_key", None),
+        task_name=getattr(task, "task_name", None),
+        status=getattr(task, "status", None),
+        scope=getattr(task, "scope", None),
+        changed_on=getattr(task, "changed_on", None),
+        created_on=getattr(task, "created_on", None),

Review Comment:
   **Suggestion:** `serialize_task_object` forwards `changed_on`/`created_on` 
datetimes as-is, but task timestamps can be naive (no timezone). Returning 
naive datetimes causes ambiguous JSON timestamps and incorrect client-side 
interpretation across timezones. Normalize naive values to UTC (as done in the 
action-log serializer) before building `TaskInfo`. [logic error]
   
   <details>
   <summary><b>Severity Level:</b> Major ⚠️</summary>
   
   ```mdx
   - ❌ MCP `list_tasks` returns naive, timezone-less timestamps.
   - ❌ MCP `get_task_info` also exposes naive timestamps.
   - ⚠️ Cross-timezone clients may misinterpret task times.
   - ⚠️ Inconsistent behavior versus action-log MCP timestamp normalization.
   ```
   </details>
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. Note the Task model inherits `AuditMixinNullable` in 
`superset/models/tasks.py:11`
   (`class Task(CoreTask, AuditMixinNullable, Model):`), which defines 
`created_on` and
   `changed_on` as naive `DateTime` columns with `default=datetime.now` and
   `onupdate=datetime.now` in `superset/models/helpers.py:561-569`, i.e. 
datetimes without
   timezone information.
   
   2. The MCP list endpoint `list_tasks` in
   `superset/mcp_service/task/tool/list_tasks.py:53-115` uses `TaskDAO` to load 
`Task` rows
   and an internal `_serialize` function at line 87 that simply calls
   `serialize_task_object(obj)` from `superset/mcp_service/task/schemas.py`, 
passing through
   the model instances with their naive `created_on`/`changed_on` values.
   
   3. The serializer `serialize_task_object` in
   `superset/mcp_service/task/schemas.py:226-239` builds a `TaskInfo` Pydantic 
model by
   forwarding `changed_on=getattr(task, "changed_on", None)` and 
`created_on=getattr(task,
   "created_on", None)` (lines 238-239) with no timezone normalization, so any 
naive datetime
   from the ORM is preserved as a naive datetime field on the Pydantic model.
   
   4. When `list_tasks` returns the response, it calls 
`result.model_dump(mode="json",
   context={"select_columns": columns_to_filter})` in
   `superset/mcp_service/task/tool/list_tasks.py:129-133`, which causes 
Pydantic to serialize
   these naive datetimes to ISO-8601 strings without timezone offsets; clients 
consuming the
   MCP responses (for example via the FastMCP integration) will see ambiguous 
timestamps
   (e.g. `"2026-05-26T12:34:56"`) that are interpreted differently across 
timezones, unlike
   the action log MCP serializer in 
`superset/mcp_service/action_log/schemas.py:8-20` which
   explicitly normalizes a naive `dttm` via `dttm.replace(tzinfo=timezone.utc)` 
before
   returning it.
   ```
   </details>
   
   [Fix in 
Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=4b4602bdbd564bb59be772815ff176be&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset)
 | [Fix in VSCode 
Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=4b4602bdbd564bb59be772815ff176be&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset)
   
   *(Use Cmd/Ctrl + Click for best experience)*
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** superset/mcp_service/task/schemas.py
   **Line:** 238:239
   **Comment:**
        *Logic Error: `serialize_task_object` forwards 
`changed_on`/`created_on` datetimes as-is, but task timestamps can be naive (no 
timezone). Returning naive datetimes causes ambiguous JSON timestamps and 
incorrect client-side interpretation across timezones. Normalize naive values 
to UTC (as done in the action-log serializer) before building `TaskInfo`.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   Once fix is implemented, also check other comments on the same PR, and ask 
user if the user wants to fix the rest of the comments as well. if said yes, 
then fetch all the comments validate the correctness and implement a minimal fix
   ```
   </details>
   <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40344&comment_hash=202cb6e0441641d7870e7fe7e4cb91044be9d22c2f46307dc9969a030676c320&reaction=like'>👍</a>
 | <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40344&comment_hash=202cb6e0441641d7870e7fe7e4cb91044be9d22c2f46307dc9969a030676c320&reaction=dislike'>👎</a>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to