Copilot commented on code in PR #39922: URL: https://github.com/apache/superset/pull/39922#discussion_r3202727508
########## superset/mcp_service/chart/registry.py: ########## @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +ChartTypeRegistry — central registry mapping chart_type strings to plugins. + +Replaces the four previously-scattered dispatch locations: + - schema_validator.py: chart_type_validators dict + - dataset_validator.py: isinstance branches in _extract_column_references() + - chart_utils.py: if/elif chain in map_config_to_form_data() + - dataset_validator.py: isinstance branches in normalize_column_names() + +Usage:: + + from superset.mcp_service.chart.registry import get_registry + + plugin = get_registry().get("xy") + if plugin is None: + raise ValueError("Unknown chart type: xy") + form_data = plugin.to_form_data(config, dataset_id) +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from superset.mcp_service.chart.plugin import ChartTypePlugin + +logger = logging.getLogger(__name__) + +_REGISTRY: dict[str, "ChartTypePlugin"] = {} + + +def register(plugin: "ChartTypePlugin") -> None: + """Register a chart type plugin in the global registry.""" + if plugin.chart_type in _REGISTRY: + logger.warning( + "Overwriting existing plugin for chart_type=%r", plugin.chart_type + ) + _REGISTRY[plugin.chart_type] = plugin + logger.debug("Registered chart plugin: %r", plugin.chart_type) + + +def get(chart_type: str) -> "ChartTypePlugin | None": + """Return the plugin for a given chart_type, or None if not registered.""" + return _REGISTRY.get(chart_type) Review Comment: The global registry remains empty unless `superset.mcp_service.chart.plugins` is imported somewhere first. With the new dispatch (SchemaValidator/DatasetValidator/map_config_to_form_data/etc.), calling these utilities in isolation (e.g. unit tests or other Python entrypoints) will treat all chart_types as unregistered and/or raise `Unsupported config type`. Consider making the registry self-initializing (e.g. lazy-import the built-in plugins the first time `get/get_registry/all_types/is_registered` is called when `_REGISTRY` is empty), or otherwise provide an explicit `init_registry()` that callers must invoke early and enforce it in these call sites. ########## superset/mcp_service/chart/validation/schema_validator.py: ########## @@ -147,19 +147,13 @@ def _pre_validate_chart_type( chart_type: str, config: Dict[str, Any], ) -> Tuple[bool, ChartGenerationError | None]: - """Validate chart type and dispatch to type-specific pre-validation.""" - chart_type_validators = { - "xy": SchemaValidator._pre_validate_xy_config, - "table": SchemaValidator._pre_validate_table_config, - "pie": SchemaValidator._pre_validate_pie_config, - "pivot_table": SchemaValidator._pre_validate_pivot_table_config, - "mixed_timeseries": SchemaValidator._pre_validate_mixed_timeseries_config, - "handlebars": SchemaValidator._pre_validate_handlebars_config, - "big_number": SchemaValidator._pre_validate_big_number_config, - } + """Validate chart type and dispatch to plugin pre-validation.""" + from superset.mcp_service.chart.registry import get_registry - if not isinstance(chart_type, str) or chart_type not in chart_type_validators: - valid_types = ", ".join(chart_type_validators.keys()) + registry = get_registry() + + if not isinstance(chart_type, str) or not registry.is_registered(chart_type): + valid_types = ", ".join(registry.all_types()) return False, ChartGenerationError( Review Comment: `_pre_validate_chart_type` now relies on the plugin registry being pre-populated; if the registry hasn't been initialized yet, every request will be rejected as INVALID_CHART_TYPE and `valid_types` will be empty. To avoid fragile import-order coupling, consider ensuring the registry auto-loads built-in plugins on first access (or explicitly initializing it here before calling `is_registered()`). ########## superset/mcp_service/chart/validation/runtime/__init__.py: ########## @@ -56,20 +53,10 @@ def validate_runtime_issues( warnings: List[str] = [] suggestions: List[str] = [] - # Only check XY charts for format and cardinality issues - if isinstance(config, XYChartConfig): - # Format-type compatibility validation - format_warnings = RuntimeValidator._validate_format_compatibility(config) - if format_warnings: - warnings.extend(format_warnings) - - # Cardinality validation - cardinality_warnings, cardinality_suggestions = ( - RuntimeValidator._validate_cardinality(config, dataset_id) - ) - if cardinality_warnings: - warnings.extend(cardinality_warnings) - suggestions.extend(cardinality_suggestions) + # Per-plugin runtime warnings (format, cardinality, etc.) + plugin_warnings = RuntimeValidator._validate_plugin_runtime(config, dataset_id) + if plugin_warnings: + warnings.extend(plugin_warnings) Review Comment: `validate_runtime_issues` still returns both `warnings` and `suggestions`, but the new plugin-based runtime validation only returns a list of warnings. This drops suggestion strings previously returned by cardinality checks (see `CardinalityValidator.check_cardinality(...)->{'suggestions': ...}`), so callers will no longer receive actionable runtime suggestions for XY charts. Consider extending the plugin API to return both warnings and suggestions (or a structured object), and plumb suggestions through to the `suggestions` metadata returned by `validate_runtime_issues`. ########## superset/mcp_service/chart/validation/schema_validator.py: ########## @@ -178,352 +172,127 @@ def _pre_validate_chart_type( error_code="INVALID_CHART_TYPE", ) - return chart_type_validators[chart_type](config) - - @staticmethod - def _pre_validate_xy_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate XY chart configuration.""" - # x is optional — defaults to dataset's main_dttm_col in map_xy_config - if "y" not in config: - return False, ChartGenerationError( - error_type="missing_xy_fields", - message="XY chart missing required field: 'y' (Y-axis metrics)", - details="XY charts require Y-axis (metrics) specifications. " - "X-axis is optional and defaults to the dataset's primary " - "datetime column when omitted.", - suggestions=[ - "Add 'y' field: [{'name': 'metric_column', 'aggregate': 'SUM'}] " - "for Y-axis", - "Example: {'chart_type': 'xy', 'x': {'name': 'date'}, " - "'y': [{'name': 'sales', 'aggregate': 'SUM'}]}", - ], - error_code="MISSING_XY_FIELDS", - ) - - # Validate Y is a list - if not isinstance(config.get("y", []), list): + plugin = registry.get(chart_type) + if plugin is None: return False, ChartGenerationError( - error_type="invalid_y_format", - message="Y-axis must be a list of metrics", - details="The 'y' field must be an array of metric specifications", - suggestions=[ - "Wrap Y-axis metric in array: 'y': [{'name': 'column', " - "'aggregate': 'SUM'}]", - "Multiple metrics supported: 'y': [metric1, metric2, ...]", - ], - error_code="INVALID_Y_FORMAT", - ) - - return True, None - - @staticmethod - def _pre_validate_table_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate table chart configuration.""" - if "columns" not in config: - return False, ChartGenerationError( - error_type="missing_columns", - message="Table chart missing required field: columns", - details="Table charts require a 'columns' array to specify which " - "columns to display", - suggestions=[ - "Add 'columns' field with array of column specifications", - "Example: 'columns': [{'name': 'product'}, {'name': 'sales', " - "'aggregate': 'SUM'}]", - "Each column can have optional 'aggregate' for metrics", - ], - error_code="MISSING_COLUMNS", - ) - - if not isinstance(config.get("columns", []), list): - return False, ChartGenerationError( - error_type="invalid_columns_format", - message="Columns must be a list", - details="The 'columns' field must be an array of column specifications", - suggestions=[ - "Ensure columns is an array: 'columns': [...]", - "Each column should be an object with 'name' field", - ], - error_code="INVALID_COLUMNS_FORMAT", - ) - - return True, None - - @staticmethod - def _pre_validate_pie_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate pie chart configuration.""" - missing_fields = [] - - if "dimension" not in config: - missing_fields.append("'dimension' (category column for slices)") - if "metric" not in config: - missing_fields.append("'metric' (value metric for slice sizes)") - - if missing_fields: - return False, ChartGenerationError( - error_type="missing_pie_fields", - message=f"Pie chart missing required " - f"fields: {', '.join(missing_fields)}", - details="Pie charts require a dimension (categories) and a metric " - "(values)", - suggestions=[ - "Add 'dimension' field: {'name': 'category_column'}", - "Add 'metric' field: {'name': 'value_column', 'aggregate': 'SUM'}", - "Example: {'chart_type': 'pie', 'dimension': {'name': " - "'product'}, 'metric': {'name': 'revenue', 'aggregate': 'SUM'}}", - ], - error_code="MISSING_PIE_FIELDS", - ) - - return True, None - - @staticmethod - def _pre_validate_handlebars_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate handlebars chart configuration.""" - if "handlebars_template" not in config: - return False, ChartGenerationError( - error_type="missing_handlebars_template", - message="Handlebars chart missing required field: handlebars_template", - details="Handlebars charts require a 'handlebars_template' string " - "containing Handlebars HTML template markup", - suggestions=[ - "Add 'handlebars_template' with a Handlebars HTML template", - "Data is available as {{data}} array in the template", - "Example: '<ul>{{#each data}}<li>{{this.name}}: " - "{{this.value}}</li>{{/each}}</ul>'", - ], - error_code="MISSING_HANDLEBARS_TEMPLATE", - ) - - template = config.get("handlebars_template") - if not isinstance(template, str) or not template.strip(): - return False, ChartGenerationError( - error_type="invalid_handlebars_template", - message="Handlebars template must be a non-empty string", - details="The 'handlebars_template' field must be a non-empty string " - "containing valid Handlebars HTML template markup", - suggestions=[ - "Ensure handlebars_template is a non-empty string", - "Example: '<ul>{{#each data}}<li>{{this.name}}</li>{{/each}}</ul>'", - ], - error_code="INVALID_HANDLEBARS_TEMPLATE", - ) - - query_mode = config.get("query_mode", "aggregate") - if query_mode not in ("aggregate", "raw"): - return False, ChartGenerationError( - error_type="invalid_query_mode", - message="Invalid query_mode for handlebars chart", - details="query_mode must be either 'aggregate' or 'raw'", - suggestions=[ - "Use 'aggregate' for aggregated data (default)", - "Use 'raw' for individual rows", - ], - error_code="INVALID_QUERY_MODE", - ) - - if query_mode == "raw" and not config.get("columns"): - return False, ChartGenerationError( - error_type="missing_raw_columns", - message="Handlebars chart in 'raw' mode requires 'columns'", - details="When query_mode is 'raw', you must specify which columns " - "to include in the query results", - suggestions=[ - "Add 'columns': [{'name': 'column_name'}] for raw mode", - "Or use query_mode='aggregate' with 'metrics' " - "and optional 'groupby'", - ], - error_code="MISSING_RAW_COLUMNS", - ) - - if query_mode == "aggregate" and not config.get("metrics"): - return False, ChartGenerationError( - error_type="missing_aggregate_metrics", - message="Handlebars chart in 'aggregate' mode requires 'metrics'", - details="When query_mode is 'aggregate' (default), you must specify " - "at least one metric with an aggregate function", - suggestions=[ - "Add 'metrics': [{'name': 'column', 'aggregate': 'SUM'}]", - "Or use query_mode='raw' with 'columns' for individual rows", - ], - error_code="MISSING_AGGREGATE_METRICS", - ) - - return True, None - - @staticmethod - def _pre_validate_big_number_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate big number chart configuration.""" - if "metric" not in config: - return False, ChartGenerationError( - error_type="missing_metric", - message="Big Number chart missing required field: metric", - details="Big Number charts require a 'metric' field " - "specifying the value to display", - suggestions=[ - "Add 'metric' with name and aggregate: " - "{'name': 'revenue', 'aggregate': 'SUM'}", - "The aggregate function is required (SUM, COUNT, AVG, MIN, MAX)", - "Example: {'chart_type': 'big_number', " - "'metric': {'name': 'sales', 'aggregate': 'SUM'}}", - ], - error_code="MISSING_BIG_NUMBER_METRIC", - ) - - metric = config.get("metric", {}) - if not isinstance(metric, dict): - return False, ChartGenerationError( - error_type="invalid_metric_type", - message="Big Number metric must be a dict with 'name' and 'aggregate'", - details="The 'metric' field must be an object, " - f"got {type(metric).__name__}", - suggestions=[ - "Use a dict: {'name': 'col', 'aggregate': 'SUM'}", - "Valid aggregates: SUM, COUNT, AVG, MIN, MAX", - ], - error_code="INVALID_BIG_NUMBER_METRIC_TYPE", - ) - if not metric.get("aggregate") and not metric.get("saved_metric"): - return False, ChartGenerationError( - error_type="missing_metric_aggregate", - message="Big Number metric must include an aggregate function " - "or reference a saved metric", - details="The metric must have an 'aggregate' field " - "or 'saved_metric': true", - suggestions=[ - "Add 'aggregate' to your metric: " - "{'name': 'col', 'aggregate': 'SUM'}", - "Or use a saved metric: " - "{'name': 'total_sales', 'saved_metric': true}", - "Valid aggregates: SUM, COUNT, AVG, MIN, MAX", - ], - error_code="MISSING_BIG_NUMBER_AGGREGATE", - ) - - show_trendline = config.get("show_trendline", False) - temporal_column = config.get("temporal_column") - if show_trendline and not temporal_column: - return False, ChartGenerationError( - error_type="missing_temporal_column", - message="Trendline requires a temporal column", - details="When 'show_trendline' is True, a " - "'temporal_column' must be specified", - suggestions=[ - "Add 'temporal_column': 'date_column_name'", - "Or set 'show_trendline': false for number only", - "Use get_dataset_info to find temporal columns", - ], - error_code="MISSING_TEMPORAL_COLUMN", - ) - - return True, None - - @staticmethod - def _pre_validate_pivot_table_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate pivot table configuration.""" - missing_fields = [] - - if "rows" not in config: - missing_fields.append("'rows' (row grouping columns)") - if "metrics" not in config: - missing_fields.append("'metrics' (aggregation metrics)") - - if missing_fields: - return False, ChartGenerationError( - error_type="missing_pivot_fields", - message=f"Pivot table missing required " - f"fields: {', '.join(missing_fields)}", - details="Pivot tables require row groupings and metrics", - suggestions=[ - "Add 'rows' field: [{'name': 'category'}]", - "Add 'metrics' field: [{'name': 'sales', 'aggregate': 'SUM'}]", - "Optional 'columns' for cross-tabulation: [{'name': 'region'}]", - ], - error_code="MISSING_PIVOT_FIELDS", - ) - - if not isinstance(config.get("rows", []), list): - return False, ChartGenerationError( - error_type="invalid_rows_format", - message="Rows must be a list of columns", - details="The 'rows' field must be an array of column specifications", - suggestions=[ - "Wrap row columns in array: 'rows': [{'name': 'category'}]", - ], - error_code="INVALID_ROWS_FORMAT", - ) - - if not isinstance(config.get("metrics", []), list): - return False, ChartGenerationError( - error_type="invalid_metrics_format", - message="Metrics must be a list", - details="The 'metrics' field must be an array of metric specifications", - suggestions=[ - "Wrap metrics in array: 'metrics': [{'name': 'sales', " - "'aggregate': 'SUM'}]", - ], - error_code="INVALID_METRICS_FORMAT", + error_type="invalid_chart_type", + message=f"Chart type '{chart_type}' has no registered plugin", + details="Internal error: chart type is listed but has no plugin", + suggestions=["Use a supported chart_type"], + error_code="INVALID_CHART_TYPE", ) + if (error := plugin.pre_validate(config)) is not None: + return False, error return True, None Review Comment: This refactor removes the per-chart-type `SchemaValidator._pre_validate_*_config` helpers entirely. The current unit tests in this repo call these methods directly (e.g. `SchemaValidator._pre_validate_big_number_config(...)`), so CI will fail unless tests are updated or you keep backwards-compatible wrappers that delegate to the relevant plugin’s `pre_validate()` implementation. ########## superset/mcp_service/chart/validation/runtime/__init__.py: ########## @@ -98,61 +85,28 @@ def validate_runtime_issues( return True, None @staticmethod - def _validate_format_compatibility(config: XYChartConfig) -> List[str]: - """Validate format-type compatibility.""" - warnings: List[str] = [] - - try: - # Import here to avoid circular imports - from .format_validator import FormatTypeValidator - - is_valid, format_warnings = ( - FormatTypeValidator.validate_format_compatibility(config) - ) - if format_warnings: - warnings.extend(format_warnings) - except ImportError: - logger.warning("Format validator not available") - except Exception as e: - logger.warning("Format validation failed: %s", e) - - return warnings - - @staticmethod - def _validate_cardinality( - config: XYChartConfig, dataset_id: int | str - ) -> Tuple[List[str], List[str]]: - """Validate cardinality issues.""" - warnings: List[str] = [] - suggestions: List[str] = [] + def _validate_plugin_runtime( + config: ChartConfig, dataset_id: int | str + ) -> List[str]: + """Delegate per-chart-type runtime warnings to the plugin registry. + Each plugin's get_runtime_warnings() method returns chart-type-specific + warnings (e.g. format/cardinality for XY). The registry dispatch removes + the previous isinstance(config, XYChartConfig) hardcoding. + """ try: - # Import here to avoid circular imports - from .cardinality_validator import CardinalityValidator - - # Determine chart type for cardinality thresholds - chart_type = config.kind if hasattr(config, "kind") else "default" - - # Check X-axis cardinality - if config.x is None: - return warnings, suggestions - is_ok, cardinality_info = CardinalityValidator.check_cardinality( - dataset_id=dataset_id, - x_column=config.x.name, - chart_type=chart_type, - group_by_column=config.group_by[0].name if config.group_by else None, - ) - - if not is_ok and cardinality_info: - warnings.extend(cardinality_info.get("warnings", [])) - suggestions.extend(cardinality_info.get("suggestions", [])) - - except ImportError: - logger.warning("Cardinality validator not available") - except Exception as e: - logger.warning("Cardinality validation failed: %s", e) - - return warnings, suggestions + from superset.mcp_service.chart.registry import get_registry + + chart_type = getattr(config, "chart_type", None) + if chart_type is None: + return [] + plugin = get_registry().get(chart_type) + if plugin is None: + return [] + return plugin.get_runtime_warnings(config, dataset_id) + except Exception as exc: + logger.warning("Plugin runtime validation failed: %s", exc) + return [] Review Comment: This module removed the `_validate_format_compatibility` and `_validate_cardinality` helpers and now dispatches through `_validate_plugin_runtime()`. The existing unit tests in this repo patch those removed methods (e.g. `RuntimeValidator._validate_cardinality`), so CI will fail unless tests are updated or compatibility shims are kept (e.g. keep the methods as thin wrappers calling the XY plugin’s runtime checks). ########## superset/mcp_service/chart/validation/schema_validator.py: ########## @@ -178,352 +172,127 @@ def _pre_validate_chart_type( error_code="INVALID_CHART_TYPE", ) - return chart_type_validators[chart_type](config) - - @staticmethod - def _pre_validate_xy_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate XY chart configuration.""" - # x is optional — defaults to dataset's main_dttm_col in map_xy_config - if "y" not in config: - return False, ChartGenerationError( - error_type="missing_xy_fields", - message="XY chart missing required field: 'y' (Y-axis metrics)", - details="XY charts require Y-axis (metrics) specifications. " - "X-axis is optional and defaults to the dataset's primary " - "datetime column when omitted.", - suggestions=[ - "Add 'y' field: [{'name': 'metric_column', 'aggregate': 'SUM'}] " - "for Y-axis", - "Example: {'chart_type': 'xy', 'x': {'name': 'date'}, " - "'y': [{'name': 'sales', 'aggregate': 'SUM'}]}", - ], - error_code="MISSING_XY_FIELDS", - ) - - # Validate Y is a list - if not isinstance(config.get("y", []), list): + plugin = registry.get(chart_type) + if plugin is None: return False, ChartGenerationError( - error_type="invalid_y_format", - message="Y-axis must be a list of metrics", - details="The 'y' field must be an array of metric specifications", - suggestions=[ - "Wrap Y-axis metric in array: 'y': [{'name': 'column', " - "'aggregate': 'SUM'}]", - "Multiple metrics supported: 'y': [metric1, metric2, ...]", - ], - error_code="INVALID_Y_FORMAT", - ) - - return True, None - - @staticmethod - def _pre_validate_table_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate table chart configuration.""" - if "columns" not in config: - return False, ChartGenerationError( - error_type="missing_columns", - message="Table chart missing required field: columns", - details="Table charts require a 'columns' array to specify which " - "columns to display", - suggestions=[ - "Add 'columns' field with array of column specifications", - "Example: 'columns': [{'name': 'product'}, {'name': 'sales', " - "'aggregate': 'SUM'}]", - "Each column can have optional 'aggregate' for metrics", - ], - error_code="MISSING_COLUMNS", - ) - - if not isinstance(config.get("columns", []), list): - return False, ChartGenerationError( - error_type="invalid_columns_format", - message="Columns must be a list", - details="The 'columns' field must be an array of column specifications", - suggestions=[ - "Ensure columns is an array: 'columns': [...]", - "Each column should be an object with 'name' field", - ], - error_code="INVALID_COLUMNS_FORMAT", - ) - - return True, None - - @staticmethod - def _pre_validate_pie_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate pie chart configuration.""" - missing_fields = [] - - if "dimension" not in config: - missing_fields.append("'dimension' (category column for slices)") - if "metric" not in config: - missing_fields.append("'metric' (value metric for slice sizes)") - - if missing_fields: - return False, ChartGenerationError( - error_type="missing_pie_fields", - message=f"Pie chart missing required " - f"fields: {', '.join(missing_fields)}", - details="Pie charts require a dimension (categories) and a metric " - "(values)", - suggestions=[ - "Add 'dimension' field: {'name': 'category_column'}", - "Add 'metric' field: {'name': 'value_column', 'aggregate': 'SUM'}", - "Example: {'chart_type': 'pie', 'dimension': {'name': " - "'product'}, 'metric': {'name': 'revenue', 'aggregate': 'SUM'}}", - ], - error_code="MISSING_PIE_FIELDS", - ) - - return True, None - - @staticmethod - def _pre_validate_handlebars_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate handlebars chart configuration.""" - if "handlebars_template" not in config: - return False, ChartGenerationError( - error_type="missing_handlebars_template", - message="Handlebars chart missing required field: handlebars_template", - details="Handlebars charts require a 'handlebars_template' string " - "containing Handlebars HTML template markup", - suggestions=[ - "Add 'handlebars_template' with a Handlebars HTML template", - "Data is available as {{data}} array in the template", - "Example: '<ul>{{#each data}}<li>{{this.name}}: " - "{{this.value}}</li>{{/each}}</ul>'", - ], - error_code="MISSING_HANDLEBARS_TEMPLATE", - ) - - template = config.get("handlebars_template") - if not isinstance(template, str) or not template.strip(): - return False, ChartGenerationError( - error_type="invalid_handlebars_template", - message="Handlebars template must be a non-empty string", - details="The 'handlebars_template' field must be a non-empty string " - "containing valid Handlebars HTML template markup", - suggestions=[ - "Ensure handlebars_template is a non-empty string", - "Example: '<ul>{{#each data}}<li>{{this.name}}</li>{{/each}}</ul>'", - ], - error_code="INVALID_HANDLEBARS_TEMPLATE", - ) - - query_mode = config.get("query_mode", "aggregate") - if query_mode not in ("aggregate", "raw"): - return False, ChartGenerationError( - error_type="invalid_query_mode", - message="Invalid query_mode for handlebars chart", - details="query_mode must be either 'aggregate' or 'raw'", - suggestions=[ - "Use 'aggregate' for aggregated data (default)", - "Use 'raw' for individual rows", - ], - error_code="INVALID_QUERY_MODE", - ) - - if query_mode == "raw" and not config.get("columns"): - return False, ChartGenerationError( - error_type="missing_raw_columns", - message="Handlebars chart in 'raw' mode requires 'columns'", - details="When query_mode is 'raw', you must specify which columns " - "to include in the query results", - suggestions=[ - "Add 'columns': [{'name': 'column_name'}] for raw mode", - "Or use query_mode='aggregate' with 'metrics' " - "and optional 'groupby'", - ], - error_code="MISSING_RAW_COLUMNS", - ) - - if query_mode == "aggregate" and not config.get("metrics"): - return False, ChartGenerationError( - error_type="missing_aggregate_metrics", - message="Handlebars chart in 'aggregate' mode requires 'metrics'", - details="When query_mode is 'aggregate' (default), you must specify " - "at least one metric with an aggregate function", - suggestions=[ - "Add 'metrics': [{'name': 'column', 'aggregate': 'SUM'}]", - "Or use query_mode='raw' with 'columns' for individual rows", - ], - error_code="MISSING_AGGREGATE_METRICS", - ) - - return True, None - - @staticmethod - def _pre_validate_big_number_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate big number chart configuration.""" - if "metric" not in config: - return False, ChartGenerationError( - error_type="missing_metric", - message="Big Number chart missing required field: metric", - details="Big Number charts require a 'metric' field " - "specifying the value to display", - suggestions=[ - "Add 'metric' with name and aggregate: " - "{'name': 'revenue', 'aggregate': 'SUM'}", - "The aggregate function is required (SUM, COUNT, AVG, MIN, MAX)", - "Example: {'chart_type': 'big_number', " - "'metric': {'name': 'sales', 'aggregate': 'SUM'}}", - ], - error_code="MISSING_BIG_NUMBER_METRIC", - ) - - metric = config.get("metric", {}) - if not isinstance(metric, dict): - return False, ChartGenerationError( - error_type="invalid_metric_type", - message="Big Number metric must be a dict with 'name' and 'aggregate'", - details="The 'metric' field must be an object, " - f"got {type(metric).__name__}", - suggestions=[ - "Use a dict: {'name': 'col', 'aggregate': 'SUM'}", - "Valid aggregates: SUM, COUNT, AVG, MIN, MAX", - ], - error_code="INVALID_BIG_NUMBER_METRIC_TYPE", - ) - if not metric.get("aggregate") and not metric.get("saved_metric"): - return False, ChartGenerationError( - error_type="missing_metric_aggregate", - message="Big Number metric must include an aggregate function " - "or reference a saved metric", - details="The metric must have an 'aggregate' field " - "or 'saved_metric': true", - suggestions=[ - "Add 'aggregate' to your metric: " - "{'name': 'col', 'aggregate': 'SUM'}", - "Or use a saved metric: " - "{'name': 'total_sales', 'saved_metric': true}", - "Valid aggregates: SUM, COUNT, AVG, MIN, MAX", - ], - error_code="MISSING_BIG_NUMBER_AGGREGATE", - ) - - show_trendline = config.get("show_trendline", False) - temporal_column = config.get("temporal_column") - if show_trendline and not temporal_column: - return False, ChartGenerationError( - error_type="missing_temporal_column", - message="Trendline requires a temporal column", - details="When 'show_trendline' is True, a " - "'temporal_column' must be specified", - suggestions=[ - "Add 'temporal_column': 'date_column_name'", - "Or set 'show_trendline': false for number only", - "Use get_dataset_info to find temporal columns", - ], - error_code="MISSING_TEMPORAL_COLUMN", - ) - - return True, None - - @staticmethod - def _pre_validate_pivot_table_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate pivot table configuration.""" - missing_fields = [] - - if "rows" not in config: - missing_fields.append("'rows' (row grouping columns)") - if "metrics" not in config: - missing_fields.append("'metrics' (aggregation metrics)") - - if missing_fields: - return False, ChartGenerationError( - error_type="missing_pivot_fields", - message=f"Pivot table missing required " - f"fields: {', '.join(missing_fields)}", - details="Pivot tables require row groupings and metrics", - suggestions=[ - "Add 'rows' field: [{'name': 'category'}]", - "Add 'metrics' field: [{'name': 'sales', 'aggregate': 'SUM'}]", - "Optional 'columns' for cross-tabulation: [{'name': 'region'}]", - ], - error_code="MISSING_PIVOT_FIELDS", - ) - - if not isinstance(config.get("rows", []), list): - return False, ChartGenerationError( - error_type="invalid_rows_format", - message="Rows must be a list of columns", - details="The 'rows' field must be an array of column specifications", - suggestions=[ - "Wrap row columns in array: 'rows': [{'name': 'category'}]", - ], - error_code="INVALID_ROWS_FORMAT", - ) - - if not isinstance(config.get("metrics", []), list): - return False, ChartGenerationError( - error_type="invalid_metrics_format", - message="Metrics must be a list", - details="The 'metrics' field must be an array of metric specifications", - suggestions=[ - "Wrap metrics in array: 'metrics': [{'name': 'sales', " - "'aggregate': 'SUM'}]", - ], - error_code="INVALID_METRICS_FORMAT", + error_type="invalid_chart_type", + message=f"Chart type '{chart_type}' has no registered plugin", + details="Internal error: chart type is listed but has no plugin", + suggestions=["Use a supported chart_type"], + error_code="INVALID_CHART_TYPE", ) + if (error := plugin.pre_validate(config)) is not None: + return False, error return True, None - @staticmethod - def _pre_validate_mixed_timeseries_config( - config: Dict[str, Any], - ) -> Tuple[bool, ChartGenerationError | None]: - """Pre-validate mixed timeseries configuration.""" - missing_fields = [] - - if "x" not in config: - missing_fields.append("'x' (X-axis temporal column)") - if "y" not in config: - missing_fields.append("'y' (primary Y-axis metrics)") - if "y_secondary" not in config: - missing_fields.append("'y_secondary' (secondary Y-axis metrics)") - - if missing_fields: - return False, ChartGenerationError( - error_type="missing_mixed_timeseries_fields", - message=f"Mixed timeseries chart missing required " - f"fields: {', '.join(missing_fields)}", - details="Mixed timeseries charts require an x-axis, primary metrics, " - "and secondary metrics", - suggestions=[ - "Add 'x' field: {'name': 'date_column'}", - "Add 'y' field: [{'name': 'revenue', 'aggregate': 'SUM'}]", - "Add 'y_secondary' field: [{'name': 'orders', " - "'aggregate': 'COUNT'}]", - "Optional: 'primary_kind' and 'secondary_kind' for chart types", - ], - error_code="MISSING_MIXED_TIMESERIES_FIELDS", - ) - - for field_name in ["y", "y_secondary"]: - if not isinstance(config.get(field_name, []), list): - return False, ChartGenerationError( - error_type=f"invalid_{field_name}_format", - message=f"'{field_name}' must be a list of metrics", - details=f"The '{field_name}' field must be an array of metric " - "specifications", - suggestions=[ - f"Wrap in array: '{field_name}': " - "[{'name': 'col', 'aggregate': 'SUM'}]", - ], - error_code=f"INVALID_{field_name.upper()}_FORMAT", - ) - - return True, None + # Per-chart-type error details used by _enhance_validation_error. + # Keyed by chart_type discriminator value. + # NOTE: Keep this dict in sync with the plugin registry in + # superset/mcp_service/chart/plugins/ — each registered chart_type must + # have a corresponding entry here so Pydantic parse errors produce + # helpful, type-specific messages. + _CHART_TYPE_ERROR_HINTS: Dict[str, Dict[str, Any]] = { + "xy": { + "error_type": "xy_validation_error", + "message": "XY chart configuration validation failed", + "details": "The XY chart configuration is missing required " + "fields or has invalid structure", + "suggestions": [ + "Ensure 'x' field exists with {'name': 'column_name'}", Review Comment: The XY error-hint suggestions imply that `x` is required ("Ensure 'x' field exists"), but `XYChartConfig.x` is optional and defaults to the dataset’s primary datetime column. This makes the enhanced Pydantic error guidance misleading; consider rewording the `x` suggestion to reflect that it’s optional (and focus on `y` being required). ########## superset/mcp_service/chart/tool/generate_chart.py: ########## @@ -175,7 +175,8 @@ async def generate_chart( # noqa: C901 - Set save_chart=True to permanently save the chart - LLM clients MUST display returned chart URL to users - Use numeric dataset ID or UUID (NOT schema.table_name format) - - MUST include chart_type in config (either 'xy' or 'table') + - MUST include chart_type in config (one of: 'xy', 'table', 'pie', + 'pivot_table', 'mixed_timeseries', 'handlebars', 'big_number') Review Comment: This docstring update expands the supported `chart_type` list, but the XY section a few lines below still says “Required fields: x, y”. In the actual schema, `XYChartConfig.x` is optional (defaults to the dataset’s main datetime column), so the tool documentation is currently inconsistent/misleading for LLM clients. Consider updating the XY bullet to reflect that only `y` is required and `x` is optional. ########## superset/mcp_service/chart/plugins/xy.py: ########## @@ -0,0 +1,185 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""XY chart type plugin (line, bar, area, scatter).""" + +from __future__ import annotations + +import logging +from typing import Any + +from superset.mcp_service.chart.plugin import BaseChartPlugin +from superset.mcp_service.chart.schemas import ColumnRef +from superset.mcp_service.common.error_schemas import ChartGenerationError + +logger = logging.getLogger(__name__) + + +class XYChartPlugin(BaseChartPlugin): + """Plugin for xy chart type (line, bar, area, scatter).""" + + chart_type = "xy" + display_name = "Line / Bar / Area / Scatter Chart" + native_viz_types = { + "echarts_timeseries_line": "Line Chart", + "echarts_timeseries_bar": "Bar Chart", + "echarts_area": "Area Chart", + "echarts_timeseries_scatter": "Scatter Plot", + } + + def pre_validate( + self, + config: dict[str, Any], + ) -> ChartGenerationError | None: + # x is optional — defaults to dataset's main_dttm_col in map_xy_config + if "y" not in config: + return ChartGenerationError( + error_type="missing_xy_fields", + message="XY chart missing required field: 'y' (Y-axis metrics)", + details=( + "XY charts require Y-axis (metrics) specifications. " + "X-axis is optional and defaults to the dataset's primary " + "datetime column when omitted." + ), + suggestions=[ + "Add 'y' field: [{'name': 'metric_column', 'aggregate': 'SUM'}]", + "Example: {'chart_type': 'xy', 'x': {'name': 'date'}, " + "'y': [{'name': 'sales', 'aggregate': 'SUM'}]}", + ], + error_code="MISSING_XY_FIELDS", + ) + + if not isinstance(config.get("y", []), list): + return ChartGenerationError( + error_type="invalid_y_format", + message="Y-axis must be a list of metrics", + details="The 'y' field must be an array of metric specifications", + suggestions=[ + "Wrap Y-axis metric in array: 'y': [{'name': 'column', " + "'aggregate': 'SUM'}]", + "Multiple metrics supported: 'y': [metric1, metric2, ...]", + ], + error_code="INVALID_Y_FORMAT", + ) + + return None + + def extract_column_refs(self, config: Any) -> list[ColumnRef]: + from superset.mcp_service.chart.schemas import XYChartConfig + + if not isinstance(config, XYChartConfig): + return [] + refs: list[ColumnRef] = [] + if config.x is not None: + refs.append(config.x) + refs.extend(config.y) + if config.group_by: + refs.extend(config.group_by) + if config.filters: + for f in config.filters: + refs.append(ColumnRef(name=f.column)) + return refs + + def to_form_data( + self, config: Any, dataset_id: int | str | None = None + ) -> dict[str, Any]: + from superset.mcp_service.chart.chart_utils import map_xy_config + + return map_xy_config(config, dataset_id=dataset_id) + + def normalize_column_refs(self, config: Any, dataset_context: Any) -> Any: + from superset.mcp_service.chart.schemas import XYChartConfig + from superset.mcp_service.chart.validation.dataset_validator import ( + DatasetValidator, + ) + + config_dict = config.model_dump() + get_canonical = DatasetValidator._get_canonical_column_name + + if config_dict.get("x"): + config_dict["x"]["name"] = get_canonical( + config_dict["x"]["name"], dataset_context + ) + for y_col in config_dict.get("y") or []: + y_col["name"] = get_canonical(y_col["name"], dataset_context) + for gb_col in config_dict.get("group_by") or []: + gb_col["name"] = get_canonical(gb_col["name"], dataset_context) + + DatasetValidator._normalize_filters(config_dict, dataset_context) + return XYChartConfig.model_validate(config_dict) + + def generate_name(self, config: Any, dataset_name: str | None = None) -> str: + from superset.mcp_service.chart.chart_utils import ( + _xy_chart_context, + _xy_chart_what, + ) + + what = _xy_chart_what(config) + context = _xy_chart_context(config) + return f"{what} \u2013 {context}" if context else what + + def resolve_viz_type(self, config: Any) -> str: + kind = getattr(config, "kind", "line") + return { + "line": "echarts_timeseries_line", + "bar": "echarts_timeseries_bar", + "area": "echarts_area", + "scatter": "echarts_timeseries_scatter", + }.get(kind, "echarts_timeseries_line") + + def get_runtime_warnings(self, config: Any, dataset_id: int | str) -> list[str]: + """Return format-compatibility and cardinality warnings for XY charts.""" + from superset.mcp_service.chart.schemas import XYChartConfig + + if not isinstance(config, XYChartConfig): + return [] + + warnings: list[str] = [] + + try: + from superset.mcp_service.chart.validation.runtime.format_validator import ( + FormatTypeValidator, + ) + + _valid, format_warnings = FormatTypeValidator.validate_format_compatibility( + config + ) + if format_warnings: + warnings.extend(format_warnings) + except Exception as exc: + logger.warning("XY format validation failed: %s", exc) + + try: + from superset.mcp_service.chart.validation.runtime.cardinality_validator import ( # noqa: E501 + CardinalityValidator, + ) + + chart_kind = config.kind if hasattr(config, "kind") else "default" + group_by_col = config.group_by[0].name if config.group_by else None + if config.x is not None: + _ok, card_info = CardinalityValidator.check_cardinality( + dataset_id=dataset_id, + x_column=config.x.name, + chart_type=chart_kind, + group_by_column=group_by_col, + ) + if not _ok and card_info: + warnings.extend(card_info.get("warnings", [])) Review Comment: `CardinalityValidator.check_cardinality()` returns both warnings and suggestions in `card_info`, but this plugin currently only propagates `card_info['warnings']`. This means callers lose the actionable suggestion strings for high-cardinality detections. If the runtime-validator contract includes suggestions, consider returning/propagating `card_info['suggestions']` as well (which likely requires updating the plugin/runtime validator interface to carry suggestions). ########## superset/mcp_service/app.py: ########## @@ -222,10 +222,14 @@ def get_default_instructions(branding: str = "Apache Superset") -> str: - PT1H (hourly), P1D (daily), P1W (weekly), P1M (monthly), P1Y (yearly) Chart Types in Existing Charts (viewable via list_charts/get_chart_info): -- pie, big_number, big_number_total, funnel, gauge_chart -- echarts_timeseries_line, echarts_timeseries_bar, echarts_timeseries_area -- pivot_table_v2, heatmap_v2, sankey_v2, sunburst_v2, treemap_v2 -- word_cloud, world_map, box_plot, bubble, mixed_timeseries +Each chart returned by list_charts / get_chart_info includes a +chart_type_display_name field with a human-readable name. Use that name +when referring to chart types — do NOT expose raw viz_type identifiers. +Common display names: Line Chart, Bar Chart, Area Chart, Scatter Plot, +Pie Chart, Table, Interactive Table, Pivot Table, Big Number, +Big Number with Trendline, Mixed Timeseries Chart, Custom Template Chart, +Funnel Chart, Gauge Chart, Heatmap, Sankey Chart, Sunburst, Treemap, +Word Cloud, World Map, Box Plot, Bubble Chart. Review Comment: The default instructions claim every chart returned by list_charts/get_chart_info includes a human-readable `chart_type_display_name` and list many “common display names” (Funnel/Gauge/Heatmap/Sankey/etc.). However, `chart_type_display_name` is currently resolved via `display_name_for_viz_type()` which only searches the MCP chart plugins’ `native_viz_types` mappings (currently covering the 7 supported generate_chart types). For most existing viz_types, this field will be null/None, making these instructions inaccurate. Either expand the backend mapping to cover common viz_types beyond the 7 plugins, or clarify here that `chart_type_display_name` may be missing and callers must not fall back to exposing raw `viz_type`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
