bito-code-review[bot] commented on code in PR #39922: URL: https://github.com/apache/superset/pull/39922#discussion_r3286969549
########## superset/mcp_service/chart/plugin.py: ########## @@ -0,0 +1,255 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +ChartTypePlugin protocol and BaseChartPlugin base class. + +Each chart type owns its pre-validation, column extraction, form_data mapping, +and post-map validation in a single plugin class. This eliminates the previous +pattern of 4 separate dispatch points (schema_validator.py, dataset_validator.py, +chart_utils.py, pipeline.py) that had to be updated in sync whenever a new chart +type was added. +""" + +from __future__ import annotations + +from typing import Any, Protocol, runtime_checkable + +from superset.mcp_service.chart.schemas import ColumnRef +from superset.mcp_service.common.error_schemas import ChartGenerationError + + +@runtime_checkable +class ChartTypePlugin(Protocol): + """ + Protocol that every chart-type plugin must satisfy. + + Implementing all eight methods in a single class guarantees that adding a + new chart type requires only one new file — the plugin — rather than edits + across multiple separate files. + """ + + #: Discriminator value matching ChartConfig's chart_type field. + chart_type: str + + #: Human-readable name shown to users (e.g. "Line / Bar / Area / Scatter"). + display_name: str + + #: Maps every Superset-internal viz_type this plugin can produce to a + #: user-facing display name, e.g. {"echarts_timeseries_line": "Line Chart"}. + #: Used by the registry to resolve display names for existing charts without + #: needing a separate JSON mapping file. + native_viz_types: dict[str, str] + + def pre_validate( + self, + config: dict[str, Any], + ) -> ChartGenerationError | None: + """ + Early validation of the raw config dict before Pydantic parsing. + + Called by SchemaValidator before attempting to parse the request. + Should check that required top-level keys are present and well-typed. + + Returns None if valid, ChartGenerationError if invalid. + """ + ... + + def extract_column_refs( + self, + config: Any, + ) -> list[ColumnRef]: + """ + Extract all column references from a parsed chart config. + + Called by DatasetValidator to validate that all referenced columns exist + in the dataset. Must cover every field that holds a column name, + including filters. + + Returns a list of ColumnRef objects (may be empty). + """ + ... + + def to_form_data( + self, + config: Any, + dataset_id: int | str | None = None, + ) -> dict[str, Any]: + """ + Map a parsed chart config to Superset's internal form_data dict. + + Replaces the if/elif chain in chart_utils.map_config_to_form_data(). + + Returns a Superset form_data dict ready for caching and rendering. + """ + ... + + def post_map_validate( + self, + config: Any, + form_data: dict[str, Any], + dataset_id: int | str | None = None, + ) -> ChartGenerationError | None: + """ + Validate the mapped form_data after to_form_data() runs. + + Use this for cross-field constraints that can only be checked once + form_data is assembled (e.g. BigNumber trendline requires a temporal + column whose type must be verified against the dataset). + + Returns None if valid, ChartGenerationError if invalid. + """ + ... + + def normalize_column_refs( + self, + config: Any, + dataset_context: Any, + ) -> Any: + """ + Return a new config with column names normalized to canonical dataset casing. + + Called by DatasetValidator.normalize_column_names(). The default + implementation (in BaseChartPlugin) returns the config unchanged; plugins + with column fields override this to fix case sensitivity mismatches. + + Returns a new config object (or the original if no normalization needed). + """ + ... + + def get_runtime_warnings( + self, + config: Any, + dataset_id: int | str, + ) -> list[str]: + """ + Return chart-type-specific runtime warnings (performance, compatibility). + + Called by RuntimeValidator to collect per-type warnings. Warnings are + informational only — they never block chart generation. The default + implementation returns an empty list; plugins override this to emit + chart-type-specific warnings (e.g. XY cardinality checks). + + Returns a list of warning message strings (may be empty). + """ + ... + + def generate_name( + self, + config: Any, + dataset_name: str | None = None, + ) -> str: + """ + Return a descriptive chart name for the given config. + + Called by chart_utils.generate_chart_name(). The name should follow + the standard format conventions documented in that function. Plugins + that do not override this return the generic fallback "Chart". + """ + ... + + def resolve_viz_type(self, config: Any) -> str: + """ + Return the Superset-internal viz_type string for this config. + + Called by chart_utils._resolve_viz_type(). The returned string must + match a registered Superset viz plugin (e.g. "echarts_timeseries_line"). + Plugins that do not override this return "unknown". + """ + ... + + def schema_error_hint(self) -> "ChartGenerationError | None": Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Invalid type annotation string</b></div> <div id="fix"> Remove the quotes around the return type annotation in the ChartTypePlugin protocol for schema_error_hint so it reads: `def schema_error_hint(self) -> ChartGenerationError | None:`. </div> </div> <small><i>Code Review Run #8680f9</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/mcp_service/chart/schemas.py: ########## @@ -480,11 +487,20 @@ def serialize_chart_object(chart: ChartLike | None) -> ChartInfo | None: # Extract structured filter information filters_info = extract_filters_from_form_data(chart_form_data) + _viz_type = getattr(chart, "viz_type", None) + try: + from superset.mcp_service.chart.registry import display_name_for_viz_type + + _display_name = display_name_for_viz_type(_viz_type) if _viz_type else None + except Exception: + _display_name = None Review Comment: <div> <div id="suggestion"> <div id="issue"><b>CWE-390: Bare Exception Handler</b></div> <div id="fix"> Replace the broad `except Exception:` with `except (ImportError, AttributeError):` to avoid masking unexpected errors while still handling expected import or attribute issues. (See also: [CWE-390](https://cwe.mitre.org/data/definitions/390.html)) </div> </div> <small><i>Code Review Run #8680f9</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/mcp_service/chart/tool/update_chart.py: ########## @@ -196,6 +196,29 @@ def _validate_update_against_dataset( } ) + # Column existence + fuzzy-match validation + # (mirrors generate_chart pipeline layer 2) + from superset.mcp_service.chart.validation.dataset_validator import DatasetValidator + + is_col_valid, col_error = DatasetValidator.validate_against_dataset( + parsed_config, dataset.id Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Redundant DB query in validation</b></div> <div id="fix"> Lines 203-204 call `DatasetValidator.validate_against_dataset(parsed_config, dataset.id)` which internally invokes `_get_dataset_context(dataset_id)` to fetch the dataset from the database. However, `_validate_update_against_dataset` already has the dataset available from lines 178-180. This causes a redundant database query per update operation. Pass the pre-fetched dataset via the `dataset_context` parameter to eliminate the duplicate lookup. </div> <details> <summary> <b>Code suggestion</b> </summary> <blockquote>Check the AI-generated fix before applying</blockquote> <div id="code"> ``` --- superset/mcp_service/chart/tool/update_chart.py +++ superset/mcp_service/chart/tool/update_chart.py @@ -200,8 +200,14 @@ def _validate_update_against_dataset( # Column existence + fuzzy-match validation # (mirrors generate_chart pipeline layer 2) from superset.mcp_service.chart.validation.dataset_validator import DatasetValidator + from superset.mcp_service.chart.compile import build_dataset_context_from_orm + + # Reuse the already-fetched dataset to avoid a redundant DB query + dataset_context = build_dataset_context_from_orm(dataset) - is_col_valid, col_error = DatasetValidator.validate_against_dataset( - parsed_config, dataset.id + is_col_valid, col_error = DatasetValidator.validate_against_dataset( + parsed_config, dataset.id, dataset_context=dataset_context ) ``` </div> </details> </div> <small><i>Code Review Run #8680f9</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/mcp_service/chart/tool/update_chart.py: ########## @@ -390,6 +413,24 @@ async def update_chart( # noqa: C901 # config is already a typed ChartConfig | None (validated by Pydantic) parsed_config = request.config + # Normalize column case to match dataset canonical names + # (mirrors generate_chart pipeline layer 4) + chart_datasource_id = getattr(chart, "datasource_id", None) + if parsed_config is not None and chart_datasource_id is not None: + from superset.mcp_service.chart.validation.dataset_validator import ( + DatasetValidator, + NORMALIZATION_EXCEPTIONS, + ) + + try: + parsed_config = DatasetValidator.normalize_column_names( + parsed_config, chart.datasource_id + ) + except NORMALIZATION_EXCEPTIONS as e: + logger.warning( + "Column normalization failed for chart %s: %s", chart.id, e + ) Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Missing test coverage for normalization</b></div> <div id="fix"> The new column normalization feature (lines 416-432) has no test coverage. Tests should verify that column names are properly normalized to match dataset canonical names and that exceptions are handled gracefully without crashing the update operation. </div> </div> <small><i>Code Review Run #8680f9</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/mcp_service/chart/plugins/handlebars.py: ########## @@ -0,0 +1,193 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Handlebars chart type plugin.""" + +from __future__ import annotations + +from typing import Any + +from superset.mcp_service.chart.chart_utils import ( + _handlebars_chart_what, + _summarize_filters, + map_handlebars_config, +) +from superset.mcp_service.chart.plugin import BaseChartPlugin +from superset.mcp_service.chart.schemas import ColumnRef, HandlebarsChartConfig +from superset.mcp_service.chart.validation.dataset_validator import DatasetValidator +from superset.mcp_service.common.error_schemas import ChartGenerationError + + +class HandlebarsChartPlugin(BaseChartPlugin): + """Plugin for handlebars chart type (custom HTML template charts).""" + + chart_type = "handlebars" + display_name = "Handlebars (Custom Template)" + native_viz_types = { + "handlebars": "Custom Template Chart", + } + + def pre_validate( + self, + config: dict[str, Any], + ) -> ChartGenerationError | None: + if "handlebars_template" not in config: + return ChartGenerationError( + error_type="missing_handlebars_template", + message="Handlebars chart missing required field: handlebars_template", + details=( + "Handlebars charts require a 'handlebars_template' string " + "containing Handlebars HTML template markup" + ), + suggestions=[ + "Add 'handlebars_template' with a Handlebars HTML template", + "Data is available as {{data}} array in the template", + "Example: '<ul>{{#each data}}<li>{{this.name}}: " + "{{this.value}}</li>{{/each}}</ul>'", + ], + error_code="MISSING_HANDLEBARS_TEMPLATE", + ) + + template = config.get("handlebars_template") + if not isinstance(template, str) or not template.strip(): + return ChartGenerationError( + error_type="invalid_handlebars_template", + message="Handlebars template must be a non-empty string", + details=( + "The 'handlebars_template' field must be a non-empty string " + "containing valid Handlebars HTML template markup" + ), + suggestions=[ + "Ensure handlebars_template is a non-empty string", + "Example: '<ul>{{#each data}}<li>{{this.name}}</li>{{/each}}</ul>'", + ], + error_code="INVALID_HANDLEBARS_TEMPLATE", + ) + + query_mode = config.get("query_mode", "aggregate") + if query_mode not in ("aggregate", "raw"): + return ChartGenerationError( + error_type="invalid_query_mode", + message="Invalid query_mode for handlebars chart", + details="query_mode must be either 'aggregate' or 'raw'", + suggestions=[ + "Use 'aggregate' for aggregated data (default)", + "Use 'raw' for individual rows", + ], + error_code="INVALID_QUERY_MODE", + ) + + if query_mode == "raw" and not config.get("columns"): + return ChartGenerationError( + error_type="missing_raw_columns", + message="Handlebars chart in 'raw' mode requires 'columns'", + details=( + "When query_mode is 'raw', you must specify which columns " + "to include in the query results" + ), + suggestions=[ + "Add 'columns': [{'name': 'column_name'}] for raw mode", + "Or use query_mode='aggregate' with 'metrics' and optional 'groupby'", # noqa: E501 + ], + error_code="MISSING_RAW_COLUMNS", + ) + + if query_mode == "aggregate" and not config.get("metrics"): + return ChartGenerationError( + error_type="missing_aggregate_metrics", + message="Handlebars chart in 'aggregate' mode requires 'metrics'", + details=( + "When query_mode is 'aggregate' (default), you must specify " + "at least one metric with an aggregate function" + ), + suggestions=[ + "Add 'metrics': [{'name': 'column', 'aggregate': 'SUM'}]", + "Or use query_mode='raw' with 'columns' for individual rows", + ], + error_code="MISSING_AGGREGATE_METRICS", + ) + + return None Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Duplicate validation with schema</b></div> <div id="fix"> The `pre_validate` method duplicates validation logic already implemented in `HandlebarsChartConfig.validate_query_fields()` (schemas.py:1055-1087). This includes checking for missing metrics in aggregate mode (line 109) and missing columns in raw mode (line 94). The schema validator also performs additional checks (mutual exclusivity, aggregate function presence) that pre_validate lacks. Having both creates maintenance divergence risk. </div> </div> <small><i>Code Review Run #8680f9</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## tests/unit_tests/mcp_service/chart/validation/test_runtime_validator.py: ########## @@ -226,28 +220,15 @@ def test_validate_table_chart_skips_xy_validations(self): ], ) - # These should not be called for table charts - with ( - patch( - "superset.mcp_service.chart.validation.runtime.RuntimeValidator." - "_validate_format_compatibility" - ) as mock_format, - patch( - "superset.mcp_service.chart.validation.runtime.RuntimeValidator." - "_validate_cardinality" - ) as mock_cardinality, - patch( - "superset.mcp_service.chart.validation.runtime.RuntimeValidator." - "_validate_chart_type" - ) as mock_chart_type, - ): - # Mock chart type validator to return no warnings + # Plugin runtime dispatches to TableChartPlugin which returns no warnings. + # Chart type suggester is also stubbed to return no warnings. + with patch( + "superset.mcp_service.chart.validation.runtime.RuntimeValidator." + "_validate_chart_type" + ) as mock_chart_type: mock_chart_type.return_value = ([], []) is_valid, error = RuntimeValidator.validate_runtime_issues(config, 1) - # Format and cardinality validation should not be called for table charts - mock_format.assert_not_called() - mock_cardinality.assert_not_called() assert is_valid is True assert error is None Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Test coverage regression</b></div> <div id="fix"> Test `test_validate_table_chart_skips_xy_validations` no longer verifies that XY-specific validations are skipped. Original assertions `mock_format.assert_not_called()` and `mock_cardinality.assert_not_called()` were removed, weakening test coverage for the skip behavior. </div> </div> <small><i>Code Review Run #8680f9</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/mcp_service/chart/validation/runtime/__init__.py: ########## @@ -98,61 +85,28 @@ def validate_runtime_issues( return True, None @staticmethod - def _validate_format_compatibility(config: XYChartConfig) -> List[str]: - """Validate format-type compatibility.""" - warnings: List[str] = [] - - try: - # Import here to avoid circular imports - from .format_validator import FormatTypeValidator - - is_valid, format_warnings = ( - FormatTypeValidator.validate_format_compatibility(config) - ) - if format_warnings: - warnings.extend(format_warnings) - except ImportError: - logger.warning("Format validator not available") - except Exception as e: - logger.warning("Format validation failed: %s", e) - - return warnings - - @staticmethod - def _validate_cardinality( - config: XYChartConfig, dataset_id: int | str - ) -> Tuple[List[str], List[str]]: - """Validate cardinality issues.""" - warnings: List[str] = [] - suggestions: List[str] = [] + def _validate_plugin_runtime( + config: ChartConfig, dataset_id: int | str + ) -> List[str]: + """Delegate per-chart-type runtime warnings to the plugin registry. + Each plugin's get_runtime_warnings() method returns chart-type-specific + warnings (e.g. format/cardinality for XY). The registry dispatch removes + the previous isinstance(config, XYChartConfig) hardcoding. + """ try: - # Import here to avoid circular imports - from .cardinality_validator import CardinalityValidator - - # Determine chart type for cardinality thresholds - chart_type = config.kind if hasattr(config, "kind") else "default" - - # Check X-axis cardinality - if config.x is None: - return warnings, suggestions - is_ok, cardinality_info = CardinalityValidator.check_cardinality( - dataset_id=dataset_id, - x_column=config.x.name, - chart_type=chart_type, - group_by_column=config.group_by[0].name if config.group_by else None, - ) - - if not is_ok and cardinality_info: - warnings.extend(cardinality_info.get("warnings", [])) - suggestions.extend(cardinality_info.get("suggestions", [])) - - except ImportError: - logger.warning("Cardinality validator not available") - except Exception as e: - logger.warning("Cardinality validation failed: %s", e) - - return warnings, suggestions + from superset.mcp_service.chart.registry import get_registry + + chart_type = getattr(config, "chart_type", None) + if chart_type is None: + return [] + plugin = get_registry().get(chart_type) + if plugin is None: + return [] + return plugin.get_runtime_warnings(config, dataset_id) + except Exception as exc: Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Avoid catching blind Exception</b></div> <div id="fix"> Replace bare `Exception` catch with specific exception types (e.g., `ImportError`, `AttributeError`, `RuntimeError`) to avoid masking unexpected errors. </div> <details> <summary> <b>Code suggestion</b> </summary> <blockquote>Check the AI-generated fix before applying</blockquote> <div id="code"> ````suggestion except (ImportError, AttributeError, RuntimeError) as exc: ```` </div> </details> </div> <small><i>Code Review Run #8680f9</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
