Copilot commented on code in PR #40099:
URL: https://github.com/apache/superset/pull/40099#discussion_r3238256881


##########
superset/mcp_service/chart/chart_helpers.py:
##########
@@ -69,6 +69,89 @@ def get_cached_form_data(form_data_key: str) -> str | None:
         return None
 
 
+def resolve_datasource_engine(datasource_id: Any, datasource_type: str) -> str:
+    """Return the datasource engine name, or ``"base"`` if it cannot be 
resolved."""
+    if not isinstance(datasource_id, (int, str)):
+        return "base"
+    try:
+        from superset.daos.datasource import DatasourceDAO
+        from superset.utils.core import DatasourceType
+
+        datasource = DatasourceDAO.get_datasource(
+            datasource_type=DatasourceType(datasource_type),
+            database_id_or_uuid=datasource_id,
+        )
+        return datasource.database.db_engine_spec.engine
+    except Exception:  # noqa: BLE001
+        logger.debug("Could not resolve engine for datasource %s", 
datasource_id)
+        return "base"
+
+
+def prepare_form_data_for_query(
+    form_data: dict[str, Any],
+    datasource_id: Any,
+    datasource_type: str,
+    extra_form_data: dict[str, Any] | None = None,
+) -> None:
+    """Normalize form_data filters before building a QueryObject payload.
+
+    Explore and legacy viz query construction merge dashboard/native filter 
payloads
+    and split adhoc filters into the concrete ``filters``/``where``/``having``
+    fields consumed by QueryObject. MCP tools that build query payloads 
directly
+    must perform the same normalization before calling QueryContextFactory.
+    """
+    from superset.utils.core import (
+        convert_legacy_filters_into_adhoc,
+        form_data_to_adhoc,
+        merge_extra_filters,
+        simple_filter_to_adhoc,
+        split_adhoc_filters_into_base_filters,
+    )
+
+    if isinstance(form_data.get("adhoc_filters"), list):
+        adhoc_filters = [
+            *(
+                form_data_to_adhoc(form_data, clause)
+                for clause in ("having", "where")
+                if form_data.get(clause)
+            ),
+            *(
+                simple_filter_to_adhoc(filter_, "where")
+                for filter_ in form_data.get("filters") or []
+                if filter_ is not None
+            ),
+            *form_data["adhoc_filters"],
+        ]
+        form_data["adhoc_filters"] = adhoc_filters
+
+    if extra_form_data:
+        form_data["extra_form_data"] = extra_form_data
+    convert_legacy_filters_into_adhoc(form_data)
+    merge_extra_filters(form_data)
+    split_adhoc_filters_into_base_filters(
+        form_data,
+        resolve_datasource_engine(datasource_id, datasource_type),
+    )
+
+
+def apply_form_data_filters_to_query(
+    query: dict[str, Any],
+    form_data: dict[str, Any],
+) -> None:
+    """Copy normalized form_data filter fields into a query payload."""
+    if filters := form_data.get("filters"):
+        query["filters"] = filters
+    else:
+        query.setdefault("filters", [])
+
+    if time_range := form_data.get("time_range"):
+        query["time_range"] = time_range
+    if where := form_data.get("where"):
+        query["where"] = where
+    if having := form_data.get("having"):
+        query["having"] = having

Review Comment:
   `apply_form_data_filters_to_query()` unconditionally assigns `filters`, 
`time_range`, `where`, and `having` onto the target query dict when those keys 
exist in `form_data`. This is fine when building a fresh query payload, but it 
becomes risky when used to mutate an existing saved `query_context` (it can 
overwrite query-level overrides, e.g. secondary queries).
   
   If this helper is intended to be reused for both cases, it likely needs an 
explicit merge/append behavior (or an `overwrite=False` mode) so callers can 
add normalized/extra filters without clobbering per-query settings.
   



##########
superset/mcp_service/chart/tool/get_chart_data.py:
##########
@@ -568,7 +561,15 @@ async def get_chart_data(  # noqa: C901
 
                 # Merge dashboard native filters into query_context's form_data
                 qc_form_data = query_context_json.setdefault("form_data", {})
-                _apply_extra_form_data(qc_form_data, request.extra_form_data)
+                if request.extra_form_data:
+                    prepare_form_data_for_query(
+                        qc_form_data,
+                        query_context_json["datasource"]["id"],
+                        query_context_json["datasource"]["type"],
+                        request.extra_form_data,
+                    )
+                    for query in query_context_json.get("queries", []):
+                        apply_form_data_filters_to_query(query, qc_form_data)

Review Comment:
   In the saved `chart.query_context` path, applying 
`prepare_form_data_for_query()` + `apply_form_data_filters_to_query()` to 
*every* existing query dict can overwrite query-specific fields (e.g. 
`time_range`, `where`, `having`, `filters`) that are already present in 
`query_context_json['queries']`. This can break multi-query charts (e.g. 
mixed/dual-layer charts) where different queries intentionally have different 
filter/time settings.
   
   Consider merging *only* the additional predicates coming from 
`request.extra_form_data` into each query (append to `filters`, AND-join 
`where`/`having`, and avoid clobbering an existing `time_range` unless that’s 
explicitly desired), instead of copying the normalized `form_data` fields 
wholesale onto each query dict.



##########
superset/mcp_service/chart/tool/get_chart_sql.py:
##########
@@ -264,17 +269,13 @@ def _build_query_context_from_form_data(
     # Preprocess adhoc_filters into where/having/filters on form_data so
     # that the QueryObject receives concrete filter clauses.  This mirrors
     # the view-layer call in viz.py:process_query_filters.
-    from superset.utils.core import (
-        merge_extra_filters,
-        split_adhoc_filters_into_base_filters,
-    )
+    from superset.mcp_service.chart.chart_helpers import 
prepare_form_data_for_query
 
     resolved_type_str: str = (
         datasource_type if isinstance(datasource_type, str) else "table"
     )
     engine = _resolve_engine(datasource_id, resolved_type_str)
-    merge_extra_filters(form_data)
-    split_adhoc_filters_into_base_filters(form_data, engine)
+    prepare_form_data_for_query(form_data, datasource_id, resolved_type_str)

Review Comment:
   In `_build_query_context_from_form_data`, the datasource engine is resolved 
via `_resolve_engine(...)`, but `prepare_form_data_for_query(...)` resolves it 
again internally (via `DatasourceDAO.get_datasource`). This introduces an extra 
DB lookup per request.
   
   Consider reusing the already-computed `engine` (e.g., by letting 
`prepare_form_data_for_query` accept an optional engine string) to avoid 
redundant datasource resolution.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to