This is an automated email from the ASF dual-hosted git repository.
jli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 1ee14c59931 fix(mcp): improve prompts, resources, and instructions
clarity (#37389)
1ee14c59931 is described below
commit 1ee14c599310e2012d9b3037f9abfb84f4453211
Author: Amin Ghadersohi <[email protected]>
AuthorDate: Fri Jan 30 13:25:38 2026 -0700
fix(mcp): improve prompts, resources, and instructions clarity (#37389)
---
superset/mcp_service/app.py | 81 +++---
.../chart/prompts/create_chart_guided.py | 219 +++++++--------
.../mcp_service/chart/resources/chart_configs.py | 293 +++++++--------------
superset/mcp_service/chart/schemas.py | 16 +-
superset/mcp_service/chart/tool/generate_chart.py | 4 -
.../mcp_service/chart/tool/get_chart_preview.py | 3 -
superset/mcp_service/common/schema_discovery.py | 65 ++++-
superset/mcp_service/system/prompts/quickstart.py | 94 +++----
.../system/resources/instance_metadata.py | 74 +++++-
9 files changed, 392 insertions(+), 457 deletions(-)
diff --git a/superset/mcp_service/app.py b/superset/mcp_service/app.py
index 4a8fc4e729f..f177d9e83b3 100644
--- a/superset/mcp_service/app.py
+++ b/superset/mcp_service/app.py
@@ -51,29 +51,27 @@ Available tools:
Dashboard Management:
- list_dashboards: List dashboards with advanced filters (1-based pagination)
- get_dashboard_info: Get detailed dashboard information by ID
-- generate_dashboard: Automatically create a dashboard from datasets with AI
+- generate_dashboard: Create a dashboard from chart IDs
- add_chart_to_existing_dashboard: Add a chart to an existing dashboard
Dataset Management:
- list_datasets: List datasets with advanced filters (1-based pagination)
-- get_dataset_info: Get detailed dataset information by ID
+- get_dataset_info: Get detailed dataset information by ID (includes
columns/metrics)
Chart Management:
- list_charts: List charts with advanced filters (1-based pagination)
- get_chart_info: Get detailed chart information by ID
- get_chart_preview: Get a visual preview of a chart with image URL
- get_chart_data: Get underlying chart data in text-friendly format
-- generate_chart: Create a new chart with AI assistance
-- update_chart: Update existing chart configuration
-- update_chart_preview: Update chart and get preview in one operation
+- generate_chart: Create and save a new chart permanently
+- generate_explore_link: Create an interactive explore URL (preferred for
exploration)
+- update_chart: Update existing saved chart configuration
+- update_chart_preview: Update cached chart preview without saving
SQL Lab Integration:
-- execute_sql: Execute SQL queries and get results
+- execute_sql: Execute SQL queries and get results (requires database_id)
- open_sql_lab_with_context: Generate SQL Lab URL with pre-filled query
-Explore & Analysis:
-- generate_explore_link: Create pre-configured explore URL with
dataset/metrics/filters
-
Schema Discovery:
- get_schema: Get schema metadata for chart/dataset/dashboard (columns,
filters)
@@ -82,42 +80,49 @@ System Information:
- health_check: Simple health check tool (takes NO parameters, call without
arguments)
Available Resources:
-- instance/metadata: Access instance configuration and metadata
-- chart/templates: Access chart configuration templates
+- instance://metadata: Instance configuration, stats, and available dataset IDs
+- chart://configs: Valid chart configuration examples and best practices
Available Prompts:
- quickstart: Interactive guide for getting started with the MCP service
- create_chart_guided: Step-by-step chart creation wizard
-Common Chart Types (viz_type) and Behaviors:
-
-Interactive Charts (support sorting, filtering, drill-down):
-- table: Standard table view with sorting and filtering
-- pivot_table_v2: Pivot table with grouping and aggregations
-- echarts_timeseries_line: Time series line chart
-- echarts_timeseries_bar: Time series bar chart
-- echarts_timeseries_area: Time series area chart
-- echarts_timeseries_scatter: Time series scatter plot
-- mixed_timeseries: Combined line/bar time series
-
-Common Visualization Types:
-- big_number: Single metric display
-- big_number_total: Total value display
-- pie: Pie chart for proportions
-- echarts_timeseries: Generic time series chart
-- funnel: Funnel chart for conversion analysis
-- gauge_chart: Gauge/speedometer visualization
-- heatmap_v2: Heat map for correlation analysis
-- sankey_v2: Sankey diagram for flow visualization
-- sunburst_v2: Sunburst chart for hierarchical data
-- treemap_v2: Tree map for hierarchical proportions
-- word_cloud: Word cloud visualization
-- world_map: Geographic world map
-- box_plot: Box plot for distribution analysis
-- bubble: Bubble chart for 3-dimensional data
+Recommended Workflows:
+
+To create a chart:
+1. list_datasets -> find a dataset
+2. get_dataset_info(id) -> examine columns and metrics
+3. generate_explore_link(dataset_id, config) -> preview interactively
+4. generate_chart(dataset_id, config, save_chart=True) -> save permanently
+
+To explore data with SQL:
+1. get_instance_info -> find database_id
+2. execute_sql(database_id, sql) -> run query
+3. open_sql_lab_with_context(database_id) -> open SQL Lab UI
+
+generate_explore_link vs generate_chart:
+- Use generate_explore_link for exploration (no permanent chart created)
+- Use generate_chart with save_chart=True only when user wants to save
permanently
+
+Chart Types You Can CREATE with generate_chart/generate_explore_link:
+- chart_type="xy", kind="line": Line chart for time series and trends
+- chart_type="xy", kind="bar": Bar chart for category comparison
+- chart_type="xy", kind="area": Area chart for volume visualization
+- chart_type="xy", kind="scatter": Scatter plot for correlation analysis
+- chart_type="table": Data table for detailed views
+- chart_type="table", viz_type="ag-grid-table": Interactive AG Grid table
+
+Time grain for temporal x-axis (time_grain parameter):
+- PT1H (hourly), P1D (daily), P1W (weekly), P1M (monthly), P1Y (yearly)
+
+Chart Types in Existing Charts (viewable via list_charts/get_chart_info):
+- pie, big_number, big_number_total, funnel, gauge_chart
+- echarts_timeseries_line, echarts_timeseries_bar, echarts_timeseries_area
+- pivot_table_v2, heatmap_v2, sankey_v2, sunburst_v2, treemap_v2
+- word_cloud, world_map, box_plot, bubble, mixed_timeseries
Query Examples:
-- List all interactive tables:
+- List all tables:
filters=[{{"col": "viz_type", "opr": "in", "value": ["table",
"pivot_table_v2"]}}]
- List time series charts:
filters=[{{"col": "viz_type", "opr": "sw", "value": "echarts_timeseries"}}]
diff --git a/superset/mcp_service/chart/prompts/create_chart_guided.py
b/superset/mcp_service/chart/prompts/create_chart_guided.py
index 6010a71bd40..06bca71bbd0 100644
--- a/superset/mcp_service/chart/prompts/create_chart_guided.py
+++ b/superset/mcp_service/chart/prompts/create_chart_guided.py
@@ -19,175 +19,132 @@
Chart prompts for visualization guidance
"""
-import logging
-
from superset_core.mcp import prompt
-logger = logging.getLogger(__name__)
-
@prompt("create_chart_guided")
async def create_chart_guided_prompt(
chart_type: str = "auto", business_goal: str = "exploration"
) -> str:
"""
- AI-powered chart creation guide following Anthropic's agent design
principles.
-
- This prompt implements:
- - Transparency: Clear reasoning at each step
- - Proactive Intelligence: Suggests insights before being asked
- - Context Awareness: Maintains conversational flow
- - Business Focus: Translates data into actionable insights
- - Validation: Verifies choices before proceeding
- - Natural Interaction: Conversational, not configuration-driven
+ Guided chart creation with step-by-step workflow.
Args:
- chart_type: Preferred chart type (auto, line, bar, pie, table,
scatter, area)
+ chart_type: Preferred chart type (auto, line, bar, table, scatter,
area)
business_goal: Purpose (exploration, reporting, monitoring,
presentation)
"""
- # Enhanced chart intelligence with business context
chart_intelligence = {
"line": {
- "description": "Time series visualization for trend analysis",
- "best_for": "Tracking performance over time, identifying patterns",
- "business_value": "Reveals growth trends, seasonality, and
patterns",
+ "description": "Time series trends",
"data_requirements": "Temporal column + continuous metrics",
},
"bar": {
- "description": "Category comparison visualization",
- "best_for": "Ranking, comparisons, and performance by category",
- "business_value": "Identifies top performers, bottlenecks, and
gaps",
+ "description": "Category comparison",
"data_requirements": "Categorical dimensions + aggregatable
metrics",
},
"scatter": {
- "description": "Correlation and relationship analysis",
- "best_for": "Finding relationships, outlier detection, clustering",
- "business_value": "Uncovers hidden correlations and identifies
anomalies",
+ "description": "Correlation analysis",
"data_requirements": "Two continuous variables, optional grouping",
},
"table": {
- "description": "Detailed data exploration and exact values",
- "best_for": "Detailed analysis, data validation, precise values",
- "business_value": "Provides granular insights and detailed
reporting",
+ "description": "Detailed data view",
"data_requirements": "Any combination of dimensions and metrics",
},
"area": {
- "description": "Volume and composition over time",
- "best_for": "Showing cumulative effects, stacked comparisons",
- "business_value": "Visualizes contribution and total volume
trends",
+ "description": "Volume over time",
"data_requirements": "Temporal dimension + stackable metrics",
},
"auto": {
- "description": "AI-powered visualization recommendation",
- "best_for": "When you're not sure what chart type to use",
- "business_value": "Optimizes chart choice based on data
characteristics",
- "data_requirements": "I'll analyze your data and recommend the
best type",
+ "description": "Recommend based on data",
+ "data_requirements": "Any - will analyze columns to determine best
type",
},
}
- # Business context intelligence
- goal_intelligence = {
- "exploration": {
- "approach": "Interactive discovery and pattern finding",
- "features": "Filters, drill-downs, multiple perspectives",
- "outcome": "Uncover hidden insights and generate hypotheses",
- },
- "reporting": {
- "approach": "Clear, professional, and consistent presentation",
- "features": "Clean design, appropriate aggregation, clear labels",
- "outcome": "Reliable, repeatable business reporting",
- },
- "monitoring": {
- "approach": "Real-time tracking with clear thresholds",
- "features": "Alert conditions, trend indicators, key metrics",
- "outcome": "Proactive issue detection and performance tracking",
- },
- "presentation": {
- "approach": "Compelling visual storytelling",
- "features": "Engaging colors, clear messaging,
audience-appropriate detail",
- "outcome": "Persuasive data-driven presentations for stakeholders",
- },
+ goal_context = {
+ "exploration": "interactive discovery with filters and drill-downs",
+ "reporting": "clean, professional presentation with clear labels",
+ "monitoring": "real-time tracking with key metrics highlighted",
+ "presentation": "compelling visual storytelling for stakeholders",
}
selected_chart = chart_intelligence.get(chart_type,
chart_intelligence["auto"])
- selected_goal = goal_intelligence.get(
- business_goal, goal_intelligence["exploration"]
- )
-
- return f"""🎯 **AI-Powered Chart Creation Assistant**
-
-I'm your intelligent data visualization partner! Let me help you create charts.
-
-**Your Visualization Goal:**
-📊 **Chart Focus**: {chart_type.title()} - {selected_chart["description"]}
-🎯 **Business Purpose**: {business_goal.title()} - {selected_goal["approach"]}
-💡 **Expected Value**: {selected_chart["business_value"]}
-
----
-
-## 🚀 My Intelligent Approach
-
-### **Phase 1: Data Intelligence** 📊
-I'll automatically analyze your dataset to understand:
-- **Data characteristics** (types, distributions, quality)
-- **Business relationships** (correlations, hierarchies, trends)
-- **Visualization opportunities** (what stories your data can tell)
-- **Performance considerations** (size, complexity, aggregation needs)
+ selected_goal = goal_context.get(business_goal,
goal_context["exploration"])
+ valid_kinds = ("line", "bar", "area", "scatter")
+ kind = chart_type if chart_type in valid_kinds else "line"
-*Why this matters: The right chart depends on your data's unique
characteristics*
+ return f"""**Guided Chart Creation**
-### **Phase 2: Smart Recommendations** 🧠
-Based on your data analysis, I'll:
-- **Recommend optimal chart types** with confidence scores and reasoning
-- **Suggest meaningful metrics** that align with your business goal
-- **Identify interesting patterns** you might want to highlight
-- **Propose filters** to focus on what matters most
-
-*Why this matters: I'll spot opportunities you might miss and save you time*
-
-### **Phase 3: Intelligent Configuration** ⚙️
-I'll configure your chart with:
-- **Business-appropriate aggregations** (daily, weekly, monthly for time
series)
-- **Meaningful labels and formatting** (currency, percentages, readable names)
-- **Performance optimizations** (appropriate limits, caching strategies)
-- **Visual best practices** (colors, scales, legends that enhance
understanding)
-
-*Why this matters: Proper configuration makes charts both beautiful and
actionable*
-
-### **Phase 4: Validation & Refinement** 🎯
-Before finalizing, I'll:
-- **Verify the chart answers your business question**
-- **Check data quality and completeness**
-- **Suggest improvements** based on visualization best practices
-- **Provide preview** so you can see exactly what you're getting
-
-*Why this matters: Great charts require iteration and validation*
-
----
-
-## 🎬 Let's Begin Your Data Story
-
-I'm ready to be your proactive data exploration partner. Here's how we can
start:
-
-**Option 1: Quick Start** ⚡
-Tell me: *"What business question are you trying to answer?"*
-(e.g., "How are our sales trending?" or "Which products perform best?")
-
-**Option 2: Dataset Exploration** 🔍
-I can show you available datasets: `list_datasets`
-Or explore a specific one: `get_dataset_info [dataset_id]`
-
-**Option 3: Visual Inspiration** 🎨
-Browse pre-built chart configurations: `superset://chart/configs` resource
-Perfect for when you want to see examples of great charts!
-
-**Option 4: Autonomous Discovery** 🤖
-Just point me to a dataset and say *"Find something interesting"*
-I'll explore autonomously and surface the most compelling insights!
+Chart type: {chart_type} - {selected_chart["description"]}
+Data needs: {selected_chart["data_requirements"]}
+Goal: {business_goal} - {selected_goal}
---
-💡 **Pro Tip**: Great charts combine business intuition with data analysis!
-
-**What's your data challenge today?** 🚀"""
+## Step-by-Step Workflow
+
+Follow these steps in order:
+
+### Step 1: Find a Dataset
+Call `list_datasets` to see available datasets.
+
+### Step 2: Examine Columns
+Call `get_dataset_info(dataset_id)` to see columns, types, and metrics.
+
+### Step 3: Choose Chart Configuration
+Based on column types:
+- Temporal x-axis + numeric y -> line or area chart
+- Categorical x-axis + numeric y -> bar chart
+- Two numeric columns -> scatter plot
+- Any columns for detail -> table
+
+### Step 4: Create the Chart
+Use `generate_explore_link` for interactive preview (preferred), or
+`generate_chart` with `save_chart=True` to save permanently.
+
+Example XY chart config:
+```json
+{{
+ "dataset_id": <id>,
+ "config": {{
+ "chart_type": "xy",
+ "kind": "{kind}",
+ "x": {{"name": "<column_name>"}},
+ "y": [{{"name": "<column_name>", "aggregate": "SUM"}}],
+ "time_grain": "P1D"
+ }}
+}}
+```
+
+Example table config:
+```json
+{{
+ "dataset_id": <id>,
+ "config": {{
+ "chart_type": "table",
+ "columns": [
+ {{"name": "<dimension_column>"}},
+ {{"name": "<metric_column>", "aggregate": "SUM", "label": "Total"}}
+ ]
+ }}
+}}
+```
+
+### Step 5: Validate Results
+- If you get a column validation error, call `get_dataset_info` to check
+ the exact column names available
+- If data is empty, check if filters are too restrictive
+- If the chart type doesn't suit the data, try a different kind
+
+## Available Aggregations
+SUM, COUNT, AVG, MIN, MAX, COUNT_DISTINCT, STDDEV, VAR, MEDIAN
+
+## Time Grain Options (for temporal x-axis)
+PT1H (hourly), P1D (daily), P1W (weekly), P1M (monthly), P3M (quarterly), P1Y
(yearly)
+
+## Additional Options
+- group_by: Add a dimension to split data into series
+- filters: [{{"column": "col", "op": "=", "value": "x"}}]
+- stacked: true (for bar/area charts)
+- legend: {{"show": true, "position": "right"}}
+- x_axis/y_axis: {{"title": "Label", "format": "$,.0f"}}"""
diff --git a/superset/mcp_service/chart/resources/chart_configs.py
b/superset/mcp_service/chart/resources/chart_configs.py
index 277404f7089..ea40a89fdc6 100644
--- a/superset/mcp_service/chart/resources/chart_configs.py
+++ b/superset/mcp_service/chart/resources/chart_configs.py
@@ -40,67 +40,71 @@ def get_chart_configs_resource() -> str:
- Best practices for each chart type configuration
"""
- # Valid XYChartConfig examples - these match the exact schema
+ # XY chart examples covering all chart kinds and features
xy_chart_configs = {
"line_chart": {
- "description": "Basic line chart for time series analysis",
+ "description": "Line chart with daily time grain",
"config": {
"chart_type": "xy",
"kind": "line",
- "x": {"name": "created_on", "label": "Date Created"},
+ "x": {"name": "order_date", "label": "Date"},
"y": [
{
- "name": "count_metric",
- "aggregate": "COUNT",
- "label": "Total Count",
+ "name": "revenue",
+ "aggregate": "SUM",
+ "label": "Daily Revenue",
}
],
+ "time_grain": "P1D",
},
- "use_cases": [
- "Time series trends",
- "Historical analysis",
- "Growth tracking",
- ],
+ "use_cases": ["Time series trends", "Growth tracking"],
},
"bar_chart": {
- "description": "Bar chart for category comparison",
+ "description": "Bar chart for category comparison with axis
formatting",
"config": {
"chart_type": "xy",
"kind": "bar",
"x": {"name": "category", "label": "Category"},
"y": [{"name": "sales", "aggregate": "SUM", "label": "Total
Sales"}],
- "x_axis": {"title": "Product Categories", "scale": "linear"},
- "y_axis": {
- "title": "Revenue ($)",
- "format": "$,.0f",
- "scale": "linear",
- },
+ "x_axis": {"title": "Product Categories"},
+ "y_axis": {"title": "Revenue ($)", "format": "$,.0f"},
},
- "use_cases": ["Category comparison", "Rankings", "Performance
metrics"],
+ "use_cases": ["Category comparison", "Rankings"],
+ },
+ "stacked_bar": {
+ "description": "Stacked bar chart with group_by dimension",
+ "config": {
+ "chart_type": "xy",
+ "kind": "bar",
+ "x": {"name": "quarter", "label": "Quarter"},
+ "y": [
+ {"name": "revenue", "aggregate": "SUM", "label":
"Revenue"},
+ ],
+ "group_by": {"name": "region", "label": "Region"},
+ "stacked": True,
+ "legend": {"show": True, "position": "right"},
+ },
+ "use_cases": ["Composition analysis", "Regional breakdown"],
},
"multi_metric_line": {
- "description": "Multi-metric line chart with grouping",
+ "description": "Multi-metric line chart with filters and monthly
grain",
"config": {
"chart_type": "xy",
"kind": "line",
- "x": {"name": "date_column", "label": "Date"},
+ "x": {"name": "order_date", "label": "Date"},
"y": [
{"name": "revenue", "aggregate": "SUM", "label":
"Revenue"},
{
- "name": "users",
+ "name": "customer_id",
"aggregate": "COUNT_DISTINCT",
- "label": "Unique Users",
+ "label": "Unique Customers",
},
],
- "group_by": {"name": "region", "label": "Region"},
- "legend": {"show": True, "position": "right"},
+ "time_grain": "P1M",
+ "legend": {"show": True, "position": "top"},
"filters": [{"column": "status", "op": "=", "value":
"active"}],
},
- "use_cases": [
- "Multi-dimensional analysis",
- "Regional comparisons",
- "KPI tracking",
- ],
+ "use_cases": ["KPI tracking", "Multi-dimensional analysis"],
},
"scatter_plot": {
"description": "Scatter plot for correlation analysis",
@@ -108,7 +112,7 @@ def get_chart_configs_resource() -> str:
"chart_type": "xy",
"kind": "scatter",
"x": {
- "name": "advertising_spend",
+ "name": "ad_spend",
"aggregate": "AVG",
"label": "Avg Ad Spend",
},
@@ -119,56 +123,44 @@ def get_chart_configs_resource() -> str:
"label": "Avg Conversion Rate",
}
],
- "group_by": {"name": "campaign_type", "label": "Campaign
Type"},
- "x_axis": {"title": "Average Advertising Spend", "format":
"$,.0f"},
- "y_axis": {"title": "Conversion Rate", "format": ".2%"},
+ "group_by": {"name": "campaign_type", "label": "Campaign"},
+ "x_axis": {"format": "$,.0f"},
+ "y_axis": {"format": ".2%"},
},
- "use_cases": [
- "Correlation analysis",
- "Outlier detection",
- "Performance relationships",
- ],
+ "use_cases": ["Correlation analysis", "Outlier detection"],
},
- "area_chart": {
- "description": "Area chart for volume visualization",
+ "stacked_area": {
+ "description": "Stacked area chart for volume composition over
time",
"config": {
"chart_type": "xy",
"kind": "area",
- "x": {"name": "month", "label": "Month"},
- "y": [
- {"name": "signups", "aggregate": "SUM", "label": "Monthly
Signups"}
- ],
- "filters": [
- {"column": "year", "op": ">=", "value": 2023},
- {"column": "active", "op": "=", "value": True},
- ],
+ "x": {"name": "order_date", "label": "Date"},
+ "y": [{"name": "signups", "aggregate": "SUM", "label":
"Signups"}],
+ "group_by": {"name": "channel", "label": "Channel"},
+ "stacked": True,
+ "time_grain": "P1W",
},
- "use_cases": ["Volume trends", "Cumulative metrics", "Stacked
comparisons"],
+ "use_cases": ["Volume trends", "Channel attribution"],
},
}
- # Valid TableChartConfig examples - these match the exact schema
+ # Table chart examples
table_chart_configs = {
"basic_table": {
- "description": "Basic data table with multiple columns",
+ "description": "Standard table with dimensions and aggregated
metrics",
"config": {
"chart_type": "table",
"columns": [
- {"name": "name", "label": "Customer Name"},
- {"name": "email", "label": "Email Address"},
+ {"name": "customer_name", "label": "Customer"},
{"name": "orders", "aggregate": "COUNT", "label": "Total
Orders"},
{"name": "revenue", "aggregate": "SUM", "label": "Total
Revenue"},
],
"sort_by": ["Total Revenue"],
},
- "use_cases": [
- "Detailed data views",
- "Customer lists",
- "Transaction records",
- ],
+ "use_cases": ["Detail views", "Customer lists"],
},
"aggregated_table": {
- "description": "Table with aggregated metrics and filters",
+ "description": "Table with multiple aggregations and filters",
"config": {
"chart_type": "table",
"columns": [
@@ -190,171 +182,64 @@ def get_chart_configs_resource() -> str:
},
],
"filters": [
- {"column": "sale_date", "op": ">=", "value": "2024-01-01"},
{"column": "status", "op": "!=", "value": "cancelled"},
],
- "sort_by": ["Total Sales", "Sales Region"],
+ "sort_by": ["Total Sales"],
},
- "use_cases": ["Summary reports", "Regional analysis", "Performance
tables"],
+ "use_cases": ["Summary reports", "Regional analysis"],
},
- }
-
- # Schema reference for developers
- schema_reference = {
- "ChartConfig": {
- "description": "Union type - XYChartConfig or TableChartConfig by
type",
- "discriminator": "chart_type",
- "types": ["xy", "table"],
- },
- "XYChartConfig": {
- "required_fields": ["chart_type", "x", "y"],
- "optional_fields": [
- "kind",
- "group_by",
- "x_axis",
- "y_axis",
- "legend",
- "filters",
- ],
- "chart_type": "xy",
- "kind_options": ["line", "bar", "area", "scatter"],
- "validation_rules": [
- "All column labels must be unique across x, y, and group_by",
- "Y-axis must have at least one column",
- "Column names must match pattern:
^[a-zA-Z0-9_][a-zA-Z0-9_\\s\\-\\.]*$",
- ],
- },
- "TableChartConfig": {
- "required_fields": ["chart_type", "columns"],
- "optional_fields": ["filters", "sort_by"],
- "chart_type": "table",
- "validation_rules": [
- "Must have at least one column",
- "All column labels must be unique",
- "Column names must match pattern:
^[a-zA-Z0-9_][a-zA-Z0-9_\\s\\-\\.]*$",
- ],
- },
- "ColumnRef": {
- "required_fields": ["name"],
- "optional_fields": ["label", "dtype", "aggregate"],
- "aggregate_options": [
- "SUM",
- "COUNT",
- "AVG",
- "MIN",
- "MAX",
- "COUNT_DISTINCT",
- "STDDEV",
- "VAR",
- "MEDIAN",
- "PERCENTILE",
- ],
- "validation_rules": [
- "Name cannot be empty and must follow pattern",
- "Labels are HTML-escaped to prevent XSS",
- "Aggregates are validated against allowed functions",
- ],
- },
- "FilterConfig": {
- "required_fields": ["column", "op", "value"],
- "operator_options": ["=", ">", "<", ">=", "<=", "!="],
- "value_types": ["string", "number", "boolean"],
- "validation_rules": [
- "Column names are sanitized to prevent injection",
- "Values are checked for malicious patterns",
- "String values are HTML-escaped",
+ "ag_grid_table": {
+ "description": "Interactive AG Grid table with advanced features",
+ "config": {
+ "chart_type": "table",
+ "viz_type": "ag-grid-table",
+ "columns": [
+ {"name": "product_name", "label": "Product"},
+ {"name": "category", "label": "Category"},
+ {"name": "quantity", "aggregate": "SUM", "label": "Qty
Sold"},
+ {"name": "revenue", "aggregate": "SUM", "label":
"Revenue"},
+ ],
+ },
+ "use_cases": [
+ "Interactive exploration",
+ "Large datasets with client-side sorting/filtering",
],
},
- "AxisConfig": {
- "optional_fields": ["title", "scale", "format"],
- "scale_options": ["linear", "log"],
- "format_examples": ["$,.2f", ".2%", ",.0f", ".1f"],
- },
- "LegendConfig": {
- "optional_fields": ["show", "position"],
- "show_default": True,
- "position_options": ["top", "bottom", "left", "right"],
- "position_default": "right",
- },
}
- # Best practices for each configuration type
+ # Best practices
best_practices = {
"xy_charts": [
- "Use descriptive labels for axes and metrics",
- "Choose appropriate aggregation functions for your data",
- "Limit the number of Y-axis metrics (3-5 maximum)",
- "Use filters to focus on relevant data",
- "Configure axis formatting for better readability",
- "Consider grouping when comparing categories",
- "Use chart kinds: line for trends, bar for comparisons, scatter
plots",
+ "Use time_grain for temporal x-axis columns (P1D, P1W, P1M, P1Y)",
+ "Limit Y-axis metrics to 3-5 maximum for readability",
+ "Use group_by to split data into series for comparison",
+ "Use stacked=true for bar/area charts showing composition",
+ "Configure axis format for readability ($,.0f for currency, .2%
for pct)",
],
"table_charts": [
- "Include essential columns only to avoid clutter",
- "Use meaningful column labels",
- "Apply sorting to highlight important data",
- "Use filters to limit result sets",
- "Mix dimensions and aggregated metrics appropriately",
- "Ensure unique labels to avoid conflicts",
- "Consider performance with large datasets",
+ "Include only essential columns to avoid clutter",
+ "Use meaningful labels different from raw column names",
+ "Apply sort_by to highlight important data",
+ "Use ag-grid-table viz_type for large interactive datasets",
],
"general": [
- "Always specify chart_type as the first field",
- "Use consistent naming conventions for columns",
- "Validate column names exist in your dataset",
- "Test configurations with actual data",
- "Consider caching for frequently accessed charts",
- "Apply security best practices - avoid user input in column names",
+ "Always verify column names with get_dataset_info before charting",
+ "Use generate_explore_link for preview, generate_chart for saving",
+ "Each column label must be unique across the entire configuration",
+ "Column names must match: ^[a-zA-Z0-9_][a-zA-Z0-9_ \\-\\.]*$",
],
}
- # Common patterns and examples
- common_patterns = {
- "time_series": {
- "description": "Standard time-based analysis",
- "x_column_types": ["date", "datetime", "timestamp"],
- "recommended_aggregations": ["SUM", "COUNT", "AVG"],
- "best_chart_types": ["line", "area", "bar"],
- },
- "categorical_analysis": {
- "description": "Comparing discrete categories",
- "x_column_types": ["string", "category", "enum"],
- "recommended_aggregations": ["SUM", "COUNT", "COUNT_DISTINCT",
"AVG"],
- "best_chart_types": ["bar", "table"],
- },
- "correlation_analysis": {
- "description": "Finding relationships between variables",
- "requirements": ["Two numerical metrics"],
- "recommended_aggregations": ["AVG", "SUM", "MEDIAN"],
- "best_chart_types": ["scatter"],
- },
- }
-
resource_data = {
"xy_chart_configs": xy_chart_configs,
"table_chart_configs": table_chart_configs,
- "schema_reference": schema_reference,
"best_practices": best_practices,
- "common_patterns": common_patterns,
- "metadata": {
- "version": "1.0",
- "schema_version": "ChartConfig v1.0",
- "last_updated": "2025-08-07",
- "usage_notes": [
- "All examples are valid ChartConfig objects that pass
validation",
- "Copy these configurations directly into generate_chart
requests",
- "Modify column names and labels to match your actual dataset",
- "Test configurations with get_dataset_info to verify columns",
- "All examples follow security best practices and input
validation",
- ],
- "validation_info": [
- "Column names must match:
^[a-zA-Z0-9_][a-zA-Z0-9_\\s\\-\\.]*$",
- "Labels are automatically HTML-escaped for security",
- "Filter values are sanitized to prevent injection attacks",
- "All field lengths are validated against schema limits",
- "Duplicate labels are automatically detected and rejected",
- ],
- },
+ "usage_notes": [
+ "All examples are valid ChartConfig objects that pass validation",
+ "Modify column names and labels to match your actual dataset",
+ "Use get_dataset_info to verify column names before charting",
+ "For complete schema details, see the generate_chart tool
parameters",
+ ],
}
from superset.utils import json
diff --git a/superset/mcp_service/chart/schemas.py
b/superset/mcp_service/chart/schemas.py
index 928858aea12..b813bc4ebc5 100644
--- a/superset/mcp_service/chart/schemas.py
+++ b/superset/mcp_service/chart/schemas.py
@@ -848,9 +848,7 @@ class GenerateChartRequest(QueryCacheControl):
default=True,
description="Whether to generate a preview image",
)
- preview_formats: List[
- Literal["url", "interactive", "ascii", "vega_lite", "table", "base64"]
- ] = Field(
+ preview_formats: List[Literal["url", "ascii", "vega_lite", "table"]] =
Field(
default_factory=lambda: ["url"],
description="List of preview formats to generate",
)
@@ -896,9 +894,7 @@ class UpdateChartRequest(QueryCacheControl):
default=True,
description="Whether to generate a preview after updating",
)
- preview_formats: List[
- Literal["url", "interactive", "ascii", "vega_lite", "table", "base64"]
- ] = Field(
+ preview_formats: List[Literal["url", "ascii", "vega_lite", "table"]] =
Field(
default_factory=lambda: ["url"],
description="List of preview formats to generate",
)
@@ -973,9 +969,7 @@ class UpdateChartPreviewRequest(FormDataCacheControl):
default=True,
description="Whether to generate a preview after updating",
)
- preview_formats: List[
- Literal["url", "interactive", "ascii", "vega_lite", "table", "base64"]
- ] = Field(
+ preview_formats: List[Literal["url", "ascii", "vega_lite", "table"]] =
Field(
default_factory=lambda: ["url"],
description="List of preview formats to generate",
)
@@ -1063,11 +1057,11 @@ class GetChartPreviewRequest(QueryCacheControl):
"""Request for chart preview with cache control."""
identifier: int | str = Field(description="Chart identifier (ID, UUID)")
- format: Literal["url", "ascii", "table", "base64", "vega_lite"] = Field(
+ format: Literal["url", "ascii", "table", "vega_lite"] = Field(
default="url",
description=(
"Preview format: 'url' for image URL, 'ascii' for text art, "
- "'table' for data table, 'base64' for embedded image, "
+ "'table' for data table, "
"'vega_lite' for interactive JSON specification"
),
)
diff --git a/superset/mcp_service/chart/tool/generate_chart.py
b/superset/mcp_service/chart/tool/generate_chart.py
index 9cd6e622584..7b7f3a17895 100644
--- a/superset/mcp_service/chart/tool/generate_chart.py
+++ b/superset/mcp_service/chart/tool/generate_chart.py
@@ -369,10 +369,6 @@ async def generate_chart( # noqa: C901
await ctx.debug(
"Processing preview format: format=%s" % (format_type,)
)
- # Skip base64 format - we never return base64
- if format_type == "base64":
- logger.info("Skipping base64 format - not supported")
- continue
if chart_id:
# For saved charts, use the existing preview generation
diff --git a/superset/mcp_service/chart/tool/get_chart_preview.py
b/superset/mcp_service/chart/tool/get_chart_preview.py
index 2b3ecf353e7..9e001540396 100644
--- a/superset/mcp_service/chart/tool/get_chart_preview.py
+++ b/superset/mcp_service/chart/tool/get_chart_preview.py
@@ -85,9 +85,6 @@ class URLPreviewStrategy(PreviewFormatStrategy):
)
-# Base64 preview support removed - we never return base64 data
-
-
class ASCIIPreviewStrategy(PreviewFormatStrategy):
"""Generate ASCII art preview."""
diff --git a/superset/mcp_service/common/schema_discovery.py
b/superset/mcp_service/common/schema_discovery.py
index 21da8028735..530d26c3875 100644
--- a/superset/mcp_service/common/schema_discovery.py
+++ b/superset/mcp_service/common/schema_discovery.py
@@ -119,6 +119,63 @@ def _get_sqlalchemy_type_name(col_type: Any) -> str:
return "str" # Default fallback
+# Descriptions for common model columns that SQLAlchemy models don't document.
+# Used as a fallback when the model column has no doc/comment attribute.
+_COLUMN_DESCRIPTIONS: dict[str, str] = {
+ # Common across models
+ "id": "Unique integer identifier",
+ "uuid": "Unique UUID identifier",
+ "created_on": "Timestamp when the resource was created",
+ "changed_on": "Timestamp when the resource was last modified",
+ "created_by_fk": "User ID of the creator",
+ "changed_by_fk": "User ID of the last modifier",
+ "description": "User-provided description text",
+ "cache_timeout": "Cache timeout override in seconds",
+ "perm": "Permission string for access control",
+ "schema_perm": "Schema-level permission string",
+ "catalog_perm": "Catalog-level permission string",
+ "is_managed_externally": "Whether managed by an external system",
+ "external_url": "URL of the external management system",
+ "certified_by": "Name of the person who certified this resource",
+ "certification_details": "Details about the certification",
+ # Chart-specific
+ "slice_name": "Chart display name",
+ "datasource_id": "ID of the underlying dataset",
+ "datasource_type": "Type of data source (e.g., table)",
+ "viz_type": "Visualization type (e.g., echarts_timeseries_line, table)",
+ "params": "JSON string of chart parameters/configuration",
+ "query_context": "JSON string of the query context for data fetching",
+ "last_saved_at": "Timestamp of the last explicit save",
+ "last_saved_by_fk": "User ID who last saved this chart",
+ # Dataset-specific
+ "table_name": "Name of the database table or view",
+ "schema": "Database schema name",
+ "catalog": "Database catalog name",
+ "database_id": "ID of the database connection",
+ "sql": "Custom SQL expression (for virtual datasets)",
+ "main_dttm_col": "Primary datetime column for time-series queries",
+ "is_sqllab_view": "Whether this dataset was created from SQL Lab",
+ "template_params": "Jinja template parameters as JSON",
+ "extra": "Extra configuration as JSON",
+ "filter_select_enabled": "Whether filter select is enabled",
+ "normalize_columns": "Whether to normalize column names",
+ "always_filter_main_dttm": "Whether to always filter on the main datetime
column",
+ "fetch_values_predicate": "SQL predicate for fetching filter values",
+ "default_endpoint": "Default endpoint URL",
+ "offset": "Row offset for queries",
+ "is_featured": "Whether this dataset is featured",
+ "currency_code_column": "Column containing currency codes",
+ # Dashboard-specific
+ "dashboard_title": "Dashboard display title",
+ "slug": "URL-friendly identifier for the dashboard",
+ "published": "Whether the dashboard is published and visible",
+ "position_json": "JSON layout of dashboard components",
+ "json_metadata": "JSON metadata including filters and settings",
+ "css": "Custom CSS for the dashboard",
+ "theme_id": "Theme ID for dashboard styling",
+}
+
+
def get_columns_from_model(
model_cls: Type[Any],
default_columns: list[str],
@@ -141,8 +198,12 @@ def get_columns_from_model(
for col in mapper.columns:
col_name = col.key
col_type = _get_sqlalchemy_type_name(col.type)
- # Get description from column doc or comment
- description = getattr(col, "doc", None) or getattr(col, "comment",
None)
+ # Get description from column doc, comment, or fallback mapping
+ description = (
+ getattr(col, "doc", None)
+ or getattr(col, "comment", None)
+ or _COLUMN_DESCRIPTIONS.get(col_name)
+ )
columns.append(
ColumnMetadata(
diff --git a/superset/mcp_service/system/prompts/quickstart.py
b/superset/mcp_service/system/prompts/quickstart.py
index 3955cbc594d..ec56400b3a4 100644
--- a/superset/mcp_service/system/prompts/quickstart.py
+++ b/superset/mcp_service/system/prompts/quickstart.py
@@ -19,13 +19,9 @@
System prompts for general guidance
"""
-import logging
-
from flask import current_app
from superset_core.mcp import prompt
-logger = logging.getLogger(__name__)
-
def _get_app_name() -> str:
"""Get the application name from Flask config."""
@@ -43,61 +39,57 @@ async def quickstart_prompt(
"""
Guide new users through their first experience with the platform.
- This prompt helps users:
- 1. Understand what data is available
- 2. Create their first visualization
- 3. Build a simple dashboard
- 4. Learn key Superset concepts
-
Args:
user_type: Type of user (analyst, executive, developer)
focus_area: Area of interest (sales, marketing, operations, general)
"""
- # Build personalized prompt based on user type
- intro_messages = {
- "analyst": "I see you're an analyst. Let's explore the data and build
some "
- "detailed visualizations.",
- "executive": "Welcome! Let's create a high-level dashboard with key
business "
- "metrics.",
- "developer": "Great to have a developer here! Let's explore both the
UI and "
- "API capabilities.",
- }
-
- focus_examples = {
- "sales": "Since you're interested in sales, we'll focus on revenue,
customer, "
- "and product metrics.",
- "marketing": "For marketing analytics, we'll look at campaigns,
conversions, "
- "and customer acquisition.",
- "operations": "Let's explore operational efficiency, inventory, and
process "
- "metrics.",
- "general": "We'll explore various datasets to find what's most
relevant to "
- "you.",
- }
-
- intro = intro_messages.get(user_type, intro_messages["analyst"])
- focus = focus_examples.get(focus_area, focus_examples["general"])
app_name = _get_app_name()
- return f"""Welcome to {app_name}! I'll guide you through creating your
first
- dashboard.
+ # Workflow varies by user type
+ workflows = {
+ "analyst": f"""**Workflow for Analysts:**
+
+1. Call `get_instance_info` to see what's available in this {app_name} instance
+2. Call `list_datasets` to find datasets relevant to {focus_area}
+3. Call `get_dataset_info(id)` to examine columns and metrics
+4. Call `generate_explore_link` to create interactive chart previews
+5. Iterate on chart configuration until the visualization answers your question
+6. Call `generate_chart(save_chart=True)` to save charts you want to keep
+7. Call `generate_dashboard` with your saved chart IDs to build a dashboard""",
+ "executive": f"""**Workflow for Executives:**
+
+1. Call `get_instance_info` to see available dashboards and charts
+2. Call `list_dashboards` to find existing dashboards relevant to {focus_area}
+3. Call `get_dashboard_info(id)` to view dashboard details and chart list
+4. To create a new KPI dashboard:
+ a. Call `list_datasets` to find data sources
+ b. Create charts with `generate_chart` (line/bar/table)
+ c. Call `generate_dashboard` with chart IDs""",
+ "developer": """**Workflow for Developers:**
+
+1. Call `get_instance_info` to understand the instance
+2. Call `get_schema(model_type)` to discover columns and filters
+3. Use `execute_sql(database_id, sql)` to run queries
+4. Use `open_sql_lab_with_context` for SQL Lab URLs
+5. Use `list_datasets`/`list_charts`/`list_dashboards` with filters
+6. Use `generate_explore_link` for chart previews without saving""",
+ }
-{intro} {focus}
+ selected_workflow = workflows.get(user_type, workflows["analyst"])
-I'll help you through these steps:
-1. **Explore Available Data** - See what datasets you can work with
-2. **Understand Your Data** - Examine columns, metrics, and sample data
-3. **Create Visualizations** - Build charts that tell a story
-4. **Design a Dashboard** - Combine charts into an interactive dashboard
-5. **Learn Advanced Features** - Discover filters, SQL Lab, and more
+ return f"""**{app_name} Quickstart Guide**
-To get started, I'll use these tools:
-- `get_instance_info` - Overview of your {app_name} instance
-- `list_datasets` - Find available datasets
-- `get_dataset_info` - Explore dataset details
-- `generate_chart` - Create visualizations
-- `generate_dashboard` - Build your dashboard
+{selected_workflow}
-Let me begin by checking what's available in your {app_name} instance. I'll
first get
-an overview, then show you the datasets filtered by your interest in
{focus_area}.
+**Available Tools Summary:**
+- `get_instance_info` - Instance overview (databases, dataset count, chart
count)
+- `list_datasets` / `get_dataset_info` - Find and examine data sources
+- `list_charts` / `get_chart_info` - Browse existing charts
+- `list_dashboards` / `get_dashboard_info` - Browse existing dashboards
+- `generate_explore_link` - Create interactive chart preview (no save)
+- `generate_chart` - Create and save a chart permanently
+- `generate_dashboard` - Create a dashboard from chart IDs
+- `execute_sql` - Run SQL queries against a database
+- `get_schema` - Discover filterable/sortable columns for list tools
-Would you like me to start by showing you what data you can work with?"""
+Start by calling `get_instance_info` to see what data is available."""
diff --git a/superset/mcp_service/system/resources/instance_metadata.py
b/superset/mcp_service/system/resources/instance_metadata.py
index 55d4b67f75d..29e223f06e1 100644
--- a/superset/mcp_service/system/resources/instance_metadata.py
+++ b/superset/mcp_service/system/resources/instance_metadata.py
@@ -16,11 +16,17 @@
# under the License.
"""
-System resources for providing instance configuration and stats
+System resources for providing instance configuration and stats.
+
+This resource differs from the get_instance_info tool by also including
+available dataset IDs and database IDs, so LLMs can immediately call
+get_dataset_info or execute_sql without an extra list call.
"""
import logging
+from sqlalchemy.exc import SQLAlchemyError
+
from superset.mcp_service.app import mcp
from superset.mcp_service.auth import mcp_auth_hook
@@ -31,19 +37,15 @@ logger = logging.getLogger(__name__)
@mcp_auth_hook
def get_instance_metadata_resource() -> str:
"""
- Provide comprehensive metadata about the instance.
+ Provide instance metadata with available dataset and database IDs.
This resource gives LLMs context about:
- - Available datasets and their popularity
+ - Instance summary stats (counts of dashboards, charts, datasets)
+ - Available database connections with their IDs (for execute_sql)
+ - Available datasets with IDs and table names (for get_dataset_info)
- Dashboard and chart statistics
- - Database connections
- - Popular queries and usage patterns
- - Available visualization types
- - Feature flags and configuration
"""
try:
- # Import the shared core and DAOs at runtime
- # Create a shared core instance for the resource
from typing import Any, cast, Type
from superset.daos.base import BaseDAO
@@ -62,6 +64,7 @@ def get_instance_metadata_resource() -> str:
calculate_popular_content,
calculate_recent_activity,
)
+ from superset.utils import json
instance_info_core = InstanceInfoCore(
dao_classes={
@@ -88,12 +91,57 @@ def get_instance_metadata_resource() -> str:
logger=logger,
)
- # Use the shared core's resource method
- return instance_info_core.get_resource()
+ # Get base instance info
+ base_result = json.loads(instance_info_core.get_resource())
+
+ # Remove empty popular_content if it has no useful data
+ popular = base_result.get("popular_content", {})
+ if popular and not any(popular.get(k) for k in popular):
+ del base_result["popular_content"]
+
+ # Add available datasets (top 20 by most recent modification)
+ dataset_dao = instance_info_core.dao_classes["datasets"]
+ try:
+ datasets = dataset_dao.find_all()
+ # Convert to string to avoid TypeError when comparing datetime
with None
+ sorted_datasets = sorted(
+ datasets,
+ key=lambda d: str(getattr(d, "changed_on", "") or ""),
+ reverse=True,
+ )[:20]
+ base_result["available_datasets"] = [
+ {
+ "id": ds.id,
+ "table_name": ds.table_name,
+ "schema": getattr(ds, "schema", None),
+ "database_id": getattr(ds, "database_id", None),
+ }
+ for ds in sorted_datasets
+ ]
+ except (SQLAlchemyError, AttributeError) as e:
+ logger.warning("Could not fetch datasets for metadata: %s", e)
+ base_result["available_datasets"] = []
+
+ # Add available databases (for execute_sql)
+ database_dao = instance_info_core.dao_classes["databases"]
+ try:
+ databases = database_dao.find_all()
+ base_result["available_databases"] = [
+ {
+ "id": db.id,
+ "database_name": db.database_name,
+ "backend": getattr(db, "backend", None),
+ }
+ for db in databases
+ ]
+ except (SQLAlchemyError, AttributeError) as e:
+ logger.warning("Could not fetch databases for metadata: %s", e)
+ base_result["available_databases"] = []
+
+ return json.dumps(base_result, indent=2)
- except Exception as e:
+ except (SQLAlchemyError, AttributeError, KeyError, ValueError) as e:
logger.error("Error generating instance metadata: %s", e)
- # Return minimal metadata on error
from superset.utils import json
return json.dumps(