codeant-ai-for-open-source[bot] commented on code in PR #39922: URL: https://github.com/apache/superset/pull/39922#discussion_r3398655578
########## superset/mcp_service/chart/plugins/big_number.py: ########## @@ -0,0 +1,247 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Big number chart type plugin.""" + +from __future__ import annotations + +from typing import Any + +from superset.mcp_service.chart.chart_utils import ( + _big_number_chart_what, + _summarize_filters, + is_column_truly_temporal, + map_big_number_config, +) +from superset.mcp_service.chart.plugin import BaseChartPlugin +from superset.mcp_service.chart.schemas import BigNumberChartConfig, ColumnRef +from superset.mcp_service.chart.validation.dataset_validator import DatasetValidator +from superset.mcp_service.common.error_schemas import ChartGenerationError + + +class BigNumberChartPlugin(BaseChartPlugin): + """Plugin for big_number chart type.""" + + chart_type = "big_number" + display_name = "Big Number" + native_viz_types = { + "big_number": "Big Number with Trendline", + "big_number_total": "Big Number", + } + + def pre_validate( + self, + config: dict[str, Any], + ) -> ChartGenerationError | None: + if "metric" not in config: + return ChartGenerationError( + error_type="missing_metric", + message="Big Number chart missing required field: metric", + details=( + "Big Number charts require a 'metric' field " + "specifying the value to display" + ), + suggestions=[ + "Add 'metric' with name and aggregate: " + "{'name': 'revenue', 'aggregate': 'SUM'}", + "The aggregate function is required (SUM, COUNT, AVG, MIN, MAX)", + "Example: {'chart_type': 'big_number', " + "'metric': {'name': 'sales', 'aggregate': 'SUM'}}", + ], + error_code="MISSING_BIG_NUMBER_METRIC", + ) + + metric = config.get("metric", {}) + if not isinstance(metric, dict): + return ChartGenerationError( + error_type="invalid_metric_type", + message="Big Number metric must be a dict with 'name' and 'aggregate'", + details=( + f"The 'metric' field must be an object, got {type(metric).__name__}" + ), + suggestions=[ + "Use a dict: {'name': 'col', 'aggregate': 'SUM'}", + "Valid aggregates: SUM, COUNT, AVG, MIN, MAX", + ], + error_code="INVALID_BIG_NUMBER_METRIC_TYPE", + ) + if metric.get("sql_expression"): + label = metric.get("label") + if not isinstance(label, str) or not label.strip(): + return ChartGenerationError( + error_type="missing_sql_metric_label", + message="SQL expression metrics require a non-empty 'label'", + details=( + "When using a custom SQL expression as the Big Number metric, " + "a human-readable 'label' string is required so Superset can " + "display the metric name." + ), + suggestions=[ + "Add 'label': e.g. {'sql_expression': 'SUM(a)/SUM(b)', " + "'label': 'Conversion Rate'}", + "The label must be a non-empty string", + ], + error_code="MISSING_SQL_METRIC_LABEL", + ) + elif not metric.get("aggregate") and not metric.get("saved_metric"): + return ChartGenerationError( + error_type="missing_metric_aggregate", + message=( + "Big Number metric must include an aggregate function " + "or reference a saved metric" + ), + details=( + "The metric must have an 'aggregate' field or 'saved_metric': true" + ), + suggestions=[ + "Add 'aggregate': {'name': 'col', 'aggregate': 'SUM'}", + "Or use a saved metric: {'name': 'metric', 'saved_metric': true}", + "Valid aggregates: SUM, COUNT, AVG, MIN, MAX", + ], + error_code="MISSING_BIG_NUMBER_AGGREGATE", + ) + + show_trendline = config.get("show_trendline", False) + temporal_column = config.get("temporal_column") + if show_trendline and not temporal_column: + return ChartGenerationError( + error_type="missing_temporal_column", + message="Trendline requires a temporal column", + details=( + "When 'show_trendline' is True, " + "a 'temporal_column' must be specified" + ), + suggestions=[ + "Add 'temporal_column': 'date_column_name'", + "Or set 'show_trendline': false for number only", + "Use get_dataset_info to find temporal columns", + ], + error_code="MISSING_TEMPORAL_COLUMN", + ) + + return None + + def extract_column_refs(self, config: Any) -> list[ColumnRef]: + if not isinstance(config, BigNumberChartConfig): + return [] + refs: list[ColumnRef] = [config.metric] + # temporal_column is a str field, not a ColumnRef — validate it exists + if config.temporal_column: + refs.append(ColumnRef(name=config.temporal_column)) + if config.filters: + for f in config.filters: + refs.append(ColumnRef(name=f.column)) + return refs + + def to_form_data( + self, config: Any, dataset_id: int | str | None = None + ) -> dict[str, Any]: + return map_big_number_config(config) + + def post_map_validate( + self, + config: Any, + form_data: dict[str, Any], + dataset_id: int | str | None = None, + ) -> ChartGenerationError | None: + """Verify the trendline temporal column is a real temporal SQL type. + + This check was previously baked into map_config_to_form_data() in + chart_utils.py as a special case. Moving it here keeps the dispatcher + clean and makes the constraint explicit and discoverable. + """ + if not isinstance(config, BigNumberChartConfig): + return None + if not (config.show_trendline and config.temporal_column): + return None + + if not is_column_truly_temporal(config.temporal_column, dataset_id): + return ChartGenerationError( + error_type="non_temporal_trendline_column", + message=( + f"Big Number trendline requires a temporal SQL column; " + f"'{config.temporal_column}' is not temporal." + ), + details=( + f"Column '{config.temporal_column}' does not have a temporal " + f"SQL type (DATE, DATETIME, TIMESTAMP). The trendline requires " + f"a true temporal column for DATE_TRUNC to work." + ), + suggestions=[ + "Use get_dataset_info to find columns with temporal SQL types", + "Set 'show_trendline': false to use any column as the metric", + "If the column contains dates stored as integers, " + "consider casting it in a virtual dataset", + ], + error_code="NON_TEMPORAL_TRENDLINE_COLUMN", + ) + + return None + + def generate_name(self, config: Any, dataset_name: str | None = None) -> str: + what = _big_number_chart_what(config) + context = _summarize_filters(getattr(config, "filters", None)) + return self._with_context(what, context) + + def resolve_viz_type(self, config: Any) -> str: + show_trendline = getattr(config, "show_trendline", False) + temporal_column = getattr(config, "temporal_column", None) + if show_trendline and temporal_column: + return "big_number" + return "big_number_total" + + def normalize_column_refs(self, config: Any, dataset_context: Any) -> Any: + config_dict = config.model_dump() + + if config_dict.get("metric"): + if config_dict["metric"].get("saved_metric"): + config_dict["metric"]["name"] = ( + DatasetValidator._get_canonical_metric_name( + config_dict["metric"]["name"], dataset_context + ) + ) + else: + config_dict["metric"]["name"] = ( + DatasetValidator._get_canonical_column_name( + config_dict["metric"]["name"], dataset_context + ) + ) Review Comment: **Suggestion:** Big Number supports SQL-expression metrics, which intentionally omit `name`, but normalization always canonicalizes `metric.name` in the non-saved-metric branch. For SQL metrics this triggers a runtime error and aborts normalization, so temporal/filter columns may remain unnormalized and later fail in case-sensitive datasets. Guard this branch for `sql_expression` metrics before using `name`. [null pointer] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ⚠️ Big Number generate_chart skips canonicalization for SQL metrics. - ⚠️ Temporal and filter columns may remain case-mismatched. - ⚠️ Case-sensitive datasets risk failures when building chart queries. ``` </details> <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Call the MCP `generate_chart` tool implemented in `superset/mcp_service/chart/tool/generate_chart.py:220-279` with a Big Number request whose `config` has `chart_type: "big_number"` and a SQL-expression metric, e.g. `"metric": {"sql_expression": "SUM(a)/SUM(b)", "label": "Conversion Rate"}`, plus `show_trendline: true` and a non-canonical `temporal_column` name. 2. The request enters the validation pipeline via `ValidationPipeline.validate_request_with_warnings()` in `superset/mcp_service/chart/validation/pipeline.py:90-59`, which first runs schema validation and Pydantic, producing a `BigNumberChartConfig` where `metric` is a `ColumnRef` with `sql_expression` set and `name is None` (see `ColumnRef.validate_metric_shape` in `superset/mcp_service/chart/schemas.py:88-122` and `BigNumberChartConfig.metric` in `schemas.py:389-395`). 3. In Layer 4 of the pipeline, `_normalize_column_names()` at `superset/mcp_service/chart/validation/pipeline.py:51-80` calls `DatasetValidator.normalize_column_names()` (`dataset_validator.py:379-386`), which looks up the `BigNumberChartPlugin` and invokes `BigNumberChartPlugin.normalize_column_refs(config, dataset_context)` at `superset/mcp_service/chart/plugins/big_number.py:207-230`. 4. Inside `normalize_column_refs`, the branch at `big_number.py:210-222` sees `config_dict["metric"]` present with `saved_metric` false and unconditionally calls `DatasetValidator._get_canonical_column_name(config_dict["metric"]["name"], dataset_context)` even for SQL metrics; because `config_dict["metric"]["name"]` is `None` for SQL-expression metrics, `_get_canonical_column_name` at `dataset_validator.py:45-63` does `column_name.lower()`, raising `AttributeError('NoneType' object has no attribute 'lower')`, which `_normalize_column_names` catches at `pipeline.py:74-81`, aborting normalization so `temporal_column` and `filters` are never canonicalized for this Big Number request. ``` </details> [Fix in Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=c3c4d9af31014bac9466dea91035e92f&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) | [Fix in VSCode Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=c3c4d9af31014bac9466dea91035e92f&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) *(Use Cmd/Ctrl + Click for best experience)* <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** superset/mcp_service/chart/plugins/big_number.py **Line:** 217:222 **Comment:** *Null Pointer: Big Number supports SQL-expression metrics, which intentionally omit `name`, but normalization always canonicalizes `metric.name` in the non-saved-metric branch. For SQL metrics this triggers a runtime error and aborts normalization, so temporal/filter columns may remain unnormalized and later fail in case-sensitive datasets. Guard this branch for `sql_expression` metrics before using `name`. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F39922&comment_hash=9f03d89ecec542e8c7898555b839ce3765f075258bcffa581bfafc04e4a61981&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F39922&comment_hash=9f03d89ecec542e8c7898555b839ce3765f075258bcffa581bfafc04e4a61981&reaction=dislike'>👎</a> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
