This is an automated email from the ASF dual-hosted git repository.
arivero pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 3f64c257120 fix(mcp): Add database_name as valid filter column for
list_datasets (#37865)
3f64c257120 is described below
commit 3f64c25712001f0d7443187b74fe8255f8063619
Author: Amin Ghadersohi <[email protected]>
AuthorDate: Thu Feb 12 10:47:46 2026 -0500
fix(mcp): Add database_name as valid filter column for list_datasets
(#37865)
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
superset/daos/dataset.py | 41 +++++++++++++++++--
superset/mcp_service/dataset/schemas.py | 1 +
.../mcp_service/dataset/tool/test_dataset_tools.py | 46 ++++++++++++++++++++++
3 files changed, 85 insertions(+), 3 deletions(-)
diff --git a/superset/daos/dataset.py b/superset/daos/dataset.py
index bca1fb93cab..30476c01eb6 100644
--- a/superset/daos/dataset.py
+++ b/superset/daos/dataset.py
@@ -21,10 +21,12 @@ from datetime import datetime
from typing import Any, Dict, List
import dateutil.parser
+from sqlalchemy import select
from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.orm import Query
from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
-from superset.daos.base import BaseDAO
+from superset.daos.base import BaseDAO, ColumnOperator, ColumnOperatorEnum
from superset.extensions import db
from superset.models.core import Database
from superset.models.dashboard import Dashboard
@@ -35,8 +37,10 @@ from superset.views.base import DatasourceFilter
logger = logging.getLogger(__name__)
-# Custom filterable fields for datasets
-DATASET_CUSTOM_FIELDS: dict[str, list[str]] = {}
+# Custom filterable fields for datasets (not direct model columns)
+DATASET_CUSTOM_FIELDS: dict[str, list[str]] = {
+ "database_name": ["eq", "like", "ilike"],
+}
class DatasetDAO(BaseDAO[SqlaTable]):
@@ -49,6 +53,37 @@ class DatasetDAO(BaseDAO[SqlaTable]):
base_filter = DatasourceFilter
+ @classmethod
+ def apply_column_operators(
+ cls,
+ query: Query,
+ column_operators: list[ColumnOperator] | None = None,
+ ) -> Query:
+ """Override to handle database_name filter via subquery on Database.
+
+ database_name lives on Database, not SqlaTable, so we intercept it
+ here and use a subquery to avoid duplicate joins with DatasourceFilter.
+ """
+ if not column_operators:
+ return query
+
+ remaining_operators: list[ColumnOperator] = []
+ for c in column_operators:
+ if not isinstance(c, ColumnOperator):
+ c = ColumnOperator.model_validate(c)
+ if c.col == "database_name":
+ operator_enum = ColumnOperatorEnum(c.opr)
+ subq = select(Database.id).where(
+ operator_enum.apply(Database.database_name, c.value)
+ )
+ query = query.filter(SqlaTable.database_id.in_(subq))
+ else:
+ remaining_operators.append(c)
+
+ if remaining_operators:
+ query = super().apply_column_operators(query, remaining_operators)
+ return query
+
@staticmethod
def get_database_by_id(database_id: int) -> Database | None:
try:
diff --git a/superset/mcp_service/dataset/schemas.py
b/superset/mcp_service/dataset/schemas.py
index fb7cf5e79cc..b0dad96b592 100644
--- a/superset/mcp_service/dataset/schemas.py
+++ b/superset/mcp_service/dataset/schemas.py
@@ -54,6 +54,7 @@ class DatasetFilter(ColumnOperator):
col: Literal[
"table_name",
"schema",
+ "database_name",
"owner",
"favorite",
] = Field(
diff --git a/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
b/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
index cde7e130e36..e1ec4dd99eb 100644
--- a/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
+++ b/tests/unit_tests/mcp_service/dataset/tool/test_dataset_tools.py
@@ -934,6 +934,52 @@ async def test_invalid_filter_column_raises(mcp_server):
)
+def test_database_name_filter_accepted():
+ """Test that database_name is accepted as a valid filter column.
+
+ Regression test for TypeError 'encoding without a string argument' when
+ filtering datasets by database_name.
+ """
+ request = ListDatasetsRequest(
+ filters=[{"col": "database_name", "opr": "ilike", "value":
"%dynamo%"}],
+ select_columns=["id", "database_name", "table_name"],
+ )
+ assert len(request.filters) == 1
+ assert request.filters[0].col == "database_name"
+ assert request.filters[0].opr.value == "ilike"
+ assert request.filters[0].value == "%dynamo%"
+
+
+@patch("superset.daos.dataset.DatasetDAO.list")
[email protected]
+async def test_list_datasets_with_database_name_filter(mock_list, mcp_server):
+ """Test list_datasets with database_name filter via MCP client.
+
+ Regression test: previously database_name was not in the allowed filter
+ columns, causing a Pydantic ValidationError that downstream code could
+ not serialize properly (TypeError: encoding without a string argument).
+ """
+ dataset = create_mock_dataset(
+ dataset_id=5,
+ table_name="dynamo_table",
+ database_name="dynamodb",
+ )
+ mock_list.return_value = ([dataset], 1)
+ async with Client(mcp_server) as client:
+ request = ListDatasetsRequest(
+ filters=[{"col": "database_name", "opr": "ilike", "value":
"%dynamo%"}],
+ select_columns=["id", "database_name", "table_name"],
+ )
+ result = await client.call_tool(
+ "list_datasets", {"request": request.model_dump()}
+ )
+ assert result.content is not None
+ data = json.loads(result.content[0].text)
+ assert data["datasets"] is not None
+ assert len(data["datasets"]) == 1
+ assert data["datasets"][0]["database_name"] == "dynamodb"
+
+
@patch("superset.daos.dataset.DatasetDAO.find_by_id")
@pytest.mark.asyncio
async def test_get_dataset_info_includes_columns_and_metrics(mock_info,
mcp_server):