This is an automated email from the ASF dual-hosted git repository.
arivero pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new e6d0f97aab fix(mcp): always filter list responses by columns_requested
(#37505)
e6d0f97aab is described below
commit e6d0f97aaba428eec1db06140f45a2c5de6b4896
Author: Amin Ghadersohi <[email protected]>
AuthorDate: Wed Jan 28 05:15:19 2026 -0500
fix(mcp): always filter list responses by columns_requested (#37505)
---
superset/mcp_service/chart/tool/list_charts.py | 23 ++++------
.../mcp_service/dashboard/tool/list_dashboards.py | 21 ++++-----
superset/mcp_service/dataset/tool/list_datasets.py | 23 ++++------
.../mcp_service/dataset/tool/test_dataset_tools.py | 53 ++++++++++++++++++++++
4 files changed, 81 insertions(+), 39 deletions(-)
diff --git a/superset/mcp_service/chart/tool/list_charts.py
b/superset/mcp_service/chart/tool/list_charts.py
index 9831a8d4ba..37ddbcfc50 100644
--- a/superset/mcp_service/chart/tool/list_charts.py
+++ b/superset/mcp_service/chart/tool/list_charts.py
@@ -137,20 +137,17 @@ async def list_charts(request: ListChartsRequest, ctx:
Context) -> ChartList:
% (count, total_pages)
)
- # Apply field filtering via serialization context if select_columns
specified
+ # Apply field filtering via serialization context
+ # Always use columns_requested (either explicit select_columns or
defaults)
# This triggers ChartInfo._filter_fields_by_context for each chart
- if request.select_columns:
- await ctx.debug(
- "Applying field filtering via serialization context:
select_columns=%s"
- % (request.select_columns,)
- )
- # Return dict with context - FastMCP handles serialization
- return result.model_dump(
- mode="json", context={"select_columns": request.select_columns}
- )
-
- # No filtering - return full result as dict
- return result.model_dump(mode="json")
+ columns_to_filter = result.columns_requested
+ await ctx.debug(
+ "Applying field filtering via serialization context: columns=%s"
+ % (columns_to_filter,)
+ )
+ return result.model_dump(
+ mode="json", context={"select_columns": columns_to_filter}
+ )
except Exception as e:
await ctx.error("Failed to list charts: %s" % (str(e),))
raise
diff --git a/superset/mcp_service/dashboard/tool/list_dashboards.py
b/superset/mcp_service/dashboard/tool/list_dashboards.py
index ecb3433cef..db0a30da94 100644
--- a/superset/mcp_service/dashboard/tool/list_dashboards.py
+++ b/superset/mcp_service/dashboard/tool/list_dashboards.py
@@ -139,17 +139,12 @@ async def list_dashboards(
% (count, total_pages)
)
- # Apply field filtering via serialization context if select_columns
specified
+ # Apply field filtering via serialization context
+ # Always use columns_requested (either explicit select_columns or defaults)
# This triggers DashboardInfo._filter_fields_by_context for each dashboard
- if request.select_columns:
- await ctx.debug(
- "Applying field filtering via serialization context:
select_columns=%s"
- % (request.select_columns,)
- )
- # Return dict with context - FastMCP handles serialization
- return result.model_dump(
- mode="json", context={"select_columns": request.select_columns}
- )
-
- # No filtering - return full result as dict
- return result.model_dump(mode="json")
+ columns_to_filter = result.columns_requested
+ await ctx.debug(
+ "Applying field filtering via serialization context: columns=%s"
+ % (columns_to_filter,)
+ )
+ return result.model_dump(mode="json", context={"select_columns":
columns_to_filter})
diff --git a/superset/mcp_service/dataset/tool/list_datasets.py
b/superset/mcp_service/dataset/tool/list_datasets.py
index 897d7f613d..4d81f13eb0 100644
--- a/superset/mcp_service/dataset/tool/list_datasets.py
+++ b/superset/mcp_service/dataset/tool/list_datasets.py
@@ -148,20 +148,17 @@ async def list_datasets(request: ListDatasetsRequest,
ctx: Context) -> DatasetLi
)
)
- # Apply field filtering via serialization context if select_columns
specified
+ # Apply field filtering via serialization context
+ # Always use columns_requested (either explicit select_columns or
defaults)
# This triggers DatasetInfo._filter_fields_by_context for each dataset
- if request.select_columns:
- await ctx.debug(
- "Applying field filtering via serialization context:
select_columns=%s"
- % (request.select_columns,)
- )
- # Return dict with context - FastMCP handles serialization
- return result.model_dump(
- mode="json", context={"select_columns": request.select_columns}
- )
-
- # No filtering - return full result as dict
- return result.model_dump(mode="json")
+ columns_to_filter = result.columns_requested
+ await ctx.debug(
+ "Applying field filtering via serialization context: columns=%s"
+ % (columns_to_filter,)
+ )
+ return result.model_dump(
+ mode="json", context={"select_columns": columns_to_filter}
+ )
except Exception as e:
await ctx.error(
diff --git a/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
b/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
index 9f16d11b3e..cde7e130e3 100644
--- a/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
+++ b/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
@@ -1239,6 +1239,59 @@ class TestDatasetDefaultColumnFiltering:
assert "metrics" in data["columns_available"]
assert "description" in data["columns_available"]
+ @patch("superset.daos.dataset.DatasetDAO.list")
+ @pytest.mark.asyncio
+ async def test_default_columns_filters_actual_response_data(
+ self, mock_list, mcp_server
+ ):
+ """Test that actual dataset items only contain default columns.
+
+ This verifies the fix for the bug where all 40+ columns were returned
+ with null values even when only default columns were requested.
+ See: https://github.com/apache/superset/pull/37213
+ """
+ dataset = create_mock_dataset()
+ mock_list.return_value = ([dataset], 1)
+
+ async with Client(mcp_server) as client:
+ # Empty select_columns = use default columns
+ request = ListDatasetsRequest(page=1, page_size=10,
select_columns=[])
+ result = await client.call_tool(
+ "list_datasets", {"request": request.model_dump()}
+ )
+ data = json.loads(result.content[0].text)
+
+ # Get the actual dataset item
+ assert len(data["datasets"]) == 1
+ dataset_item = data["datasets"][0]
+
+ # Verify ONLY default columns are present in the response item
+ expected_keys = {"id", "table_name", "schema_name", "uuid"}
+ actual_keys = set(dataset_item.keys())
+
+ # The response should only contain the default columns, NOT all
columns
+ # with null values (which was the bug)
+ assert actual_keys == expected_keys, (
+ f"Expected only default columns {expected_keys}, "
+ f"but got {actual_keys}. "
+ f"Extra columns: {actual_keys - expected_keys}"
+ )
+
+ # Verify non-default columns are NOT present (not even with null
values)
+ non_default_columns = [
+ "description",
+ "database_name",
+ "changed_by",
+ "changed_on",
+ "columns",
+ "metrics",
+ ]
+ for col in non_default_columns:
+ assert col not in dataset_item, (
+ f"Non-default column '{col}' should not be in response. "
+ f"This indicates the column filtering is not working."
+ )
+
class TestDatasetSortableColumns:
"""Test sortable columns configuration for dataset tools."""