Copilot commented on code in PR #39915:
URL: https://github.com/apache/superset/pull/39915#discussion_r3243700078
##########
tests/unit_tests/mcp_service/chart/test_chart_schemas.py:
##########
@@ -778,3 +778,209 @@ def
test_client_warnings_discarded_even_when_server_also_warns(self) -> None:
assert len(req.sanitization_warnings) == 1
assert "chart_name" in req.sanitization_warnings[0]
assert "injected" not in req.sanitization_warnings[0]
+
+
+class TestColumnRefNameRelaxedPattern:
+ """ColumnRef.name no longer enforces a strict regex pattern.
+
+ Many valid database column names were previously rejected:
+ - Names starting with a digit (e.g. "1Q_revenue")
+ - Names with locale-specific characters
+ The field_validator sanitize_name() still blocks XSS and SQL injection.
+ """
+
+ def test_digit_prefixed_name_accepted(self) -> None:
+ """Column names starting with a digit must now be accepted."""
+ col = ColumnRef(name="1Q_revenue")
+ assert col.name == "1Q_revenue"
+
+ def test_name_with_hyphen_accepted(self) -> None:
+ """Hyphenated column names (e.g. 'order-date') must be accepted."""
+ col = ColumnRef(name="order-date")
+ assert col.name == "order-date"
+
+ def test_name_with_dot_accepted(self) -> None:
+ """Dot-qualified names (e.g. 'schema.column') must be accepted."""
+ col = ColumnRef(name="schema.column")
+ assert col.name == "schema.column"
+
+ def test_name_with_spaces_accepted(self) -> None:
+ """Column names with spaces (e.g. 'Total Revenue') must be accepted."""
+ col = ColumnRef(name="Total Revenue")
+ assert col.name == "Total Revenue"
+
+ def test_script_tag_neutralized(self) -> None:
+ """sanitize_name() neutralizes script-tag XSS via nh3.
+ Depending on nh3 version, nh3 either strips the entire script element
+ including its content (leaving empty → ValidationError) or strips only
+ the tag delimiters (leaving 'alert(1)'). Either way, no raw <script>
+ tag is stored in the column name."""
+ try:
+ col = ColumnRef(name="<script>alert(1)</script>")
+ assert "<script>" not in col.name
+ assert "alert" not in col.name # inner payload must not survive
either
+ assert col.name # sanitized result must not be empty
Review Comment:
`TestColumnRefNameRelaxedPattern.test_script_tag_neutralized` is internally
inconsistent: the docstring says nh3 may leave the inner text (e.g. "alert(1)")
after stripping tags, but the assertions require that inner payload never
survives. Since `sanitize_user_input` does not reject plain-text "alert(1)" (it
only strips tags + checks URL schemes/event-handlers/SQL patterns), this test
will fail if nh3 ever preserves script contents. Consider relaxing the
assertions to only guarantee that raw `<script>` tags are removed (and/or
assert a ValidationError for the empty-after-sanitization case), rather than
asserting the inner text can never be present.
##########
superset/mcp_service/chart/validation/schema_validator.py:
##########
@@ -525,6 +525,38 @@ def _pre_validate_mixed_timeseries_config(
return True, None
+ @staticmethod
+ def _format_single_error(err: Dict[str, Any]) -> tuple[str, str | None]:
+ """Return (detail_message, optional_suggestion) for one pydantic
error."""
+ loc_parts = [str(p) for p in err.get("loc", [])]
+ loc = " -> ".join(loc_parts)
+ msg = err.get("msg", "Validation failed")
+ err_type = err.get("type", "")
+ field = loc_parts[-1] if loc_parts else "field"
+
+ if err_type == "string_pattern_mismatch":
+ pattern = err.get("ctx", {}).get("pattern", "")
+ pattern_info = f" (required pattern: `{pattern}`)" if pattern else
""
+ return (
+ f"'{field}' value does not match the required
format{pattern_info}.",
+ "Use get_dataset_info to verify exact column names and values",
Review Comment:
`_format_single_error` assumes `err["ctx"]` is a dict (`err.get("ctx",
{}).get(...)`). If Pydantic returns `ctx=None` (or any non-dict), this will
raise an AttributeError while trying to build an enhanced validation error,
masking the original validation failure. Safer to normalize with something like
`ctx = err.get("ctx") if isinstance(err.get("ctx"), dict) else {}` before
accessing keys.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]