codeant-ai-for-open-source[bot] commented on code in PR #37367:
URL: https://github.com/apache/superset/pull/37367#discussion_r2718790454
##########
superset/security/manager.py:
##########
@@ -212,6 +213,162 @@ def freeze_value(value: Any) -> str:
return json.dumps(value, sort_keys=True)
+# -----------------------------------------------------------------------------
+# Field comparison configuration for query_context_modified()
+# -----------------------------------------------------------------------------
+
+
+class FieldMapping(TypedDict):
+ """Configuration for comparing a field between requested and stored
values."""
+
+ field: str
+ equivalent: list[str]
+ comparator: NotRequired[
+ Callable[
+ ["QueryContext", "Slice", set[str]], # query_context,
stored_chart, visible
+ bool, # True if modified (should block)
+ ]
+ ]
+
+
+def _extract_orderby_column_name(orderby_item: Any) -> str | None:
+ """
+ Extract column/metric name from an orderby tuple element.
+
+ Returns None for SQL expressions which should be blocked for security.
+ """
+ if isinstance(orderby_item, str):
+ return orderby_item
+
+ if isinstance(orderby_item, dict):
+ # Block adhoc SQL expressions - potential injection vector
+ if orderby_item.get("expressionType") == "SQL":
+ return None
+ if label := orderby_item.get("label"):
+ return label
+ if col := orderby_item.get("column"):
+ return col.get("column_name")
+
+ return None
+
+
+def _get_visible_columns(stored_chart: "Slice") -> set[str]:
+ """
+ Extract column/metric names visible in the chart.
+
+ Guest users can only sort by these columns (whitelist approach).
+ """
+ params = stored_chart.params_dict
+ visible: set[str] = set()
+
+ for col in params.get("columns") or []:
+ if isinstance(col, str):
+ visible.add(col)
+ elif isinstance(col, dict) and (label := col.get("label")):
+ visible.add(label)
+
+ for col in params.get("groupby") or []:
+ if isinstance(col, str):
+ visible.add(col)
+ elif isinstance(col, dict) and (label := col.get("label")):
+ visible.add(label)
+
+ for metric in params.get("metrics") or []:
+ if isinstance(metric, str):
+ visible.add(metric)
+ elif isinstance(metric, dict) and (label := metric.get("label")):
+ visible.add(label)
+
+ return visible
+
+
+def _orderby_whitelist_compare(
+ query_context: "QueryContext",
+ stored_chart: "Slice",
+ visible_columns: set[str],
+) -> bool:
+ """
+ Compare orderby using whitelist approach.
+
+ Allows sorting by any visible column, blocks hidden columns and SQL
expressions.
+ Returns True if modified (request should be blocked).
+ """
+ form_data = query_context.form_data or {}
+
+ # Check form_data orderby
+ for orderby_tuple in form_data.get("orderby") or []:
+ if isinstance(orderby_tuple, (list, tuple)) and len(orderby_tuple) >=
1:
+ col_name = _extract_orderby_column_name(orderby_tuple[0])
+ if col_name is None or col_name not in visible_columns:
+ return True
+
+ # Check queries orderby
+ for query in query_context.queries:
+ for orderby_tuple in getattr(query, "orderby", None) or []:
+ if isinstance(orderby_tuple, (list, tuple)) and len(orderby_tuple)
>= 1:
+ col_name = _extract_orderby_column_name(orderby_tuple[0])
+ if col_name is None or col_name not in visible_columns:
+ return True
+
+ return False
+
+
+def _default_field_compare(
+ query_context: "QueryContext",
+ stored_chart: "Slice",
+ stored_query_context: dict[str, Any] | None,
+ field_name: str,
+ equiv_fields: list[str],
+) -> bool:
+ """
+ Default comparison: requested values must be subset of stored values.
+
+ Returns True if the field was modified (should block request).
+ """
+ form_data = query_context.form_data or {}
+
+ requested_values = {
+ freeze_value(value) for value in form_data.get(field_name) or []
+ }
+ stored_values = {
+ freeze_value(value) for value in
stored_chart.params_dict.get(field_name) or []
+ }
+ if not requested_values.issubset(stored_values):
+ return True
+
+ # Compare queries in query_context
+ queries_values = {
+ freeze_value(value)
+ for query in query_context.queries
+ for value in getattr(query, field_name, []) or []
+ }
+ if stored_query_context:
+ for query in stored_query_context.get("queries") or []:
+ for equiv_field in equiv_fields:
+ stored_values.update(
+ {freeze_value(value) for value in query.get(equiv_field)
or []}
+ )
Review Comment:
**Suggestion:** Logic bug in default field comparison: the code checks that
requested values are a subset of stored values before incorporating equivalent
values from the stored query_context, which can cause false positives
(blocking) when valid requested values exist only in the stored query_context
"queries" equivalencies. Merge stored_query_context equivalents into
`stored_values` before performing the requested subset check. [logic error]
<details>
<summary><b>Severity Level:</b> Critical 🚨</summary>
```mdx
- ❌ Embedded guest dashboards blocked incorrectly.
- ⚠️ query_context_modified incorrectly flags modifications.
- ⚠️ SupersetSecurityManager.raise_for_access denies valid requests.
```
</details>
```suggestion
# Base stored values from the saved chart params
stored_values = {
freeze_value(value) for value in
stored_chart.params_dict.get(field_name) or []
}
# Incorporate equivalent values coming from stored_query_context
"queries"
# before checking requested subset to avoid false positives.
if stored_query_context:
for query in stored_query_context.get("queries") or []:
for equiv_field in equiv_fields:
stored_values.update(
{freeze_value(value) for value in query.get(equiv_field)
or []}
)
# Now ensure requested values are a subset of the (augmented) stored
values
if not requested_values.issubset(stored_values):
return True
# Compare queries in runtime query_context
queries_values = {
freeze_value(value)
for query in query_context.queries
for value in getattr(query, field_name, []) or []
}
```
<details>
<summary><b>Steps of Reproduction ✅ </b></summary>
```mdx
1. Open superset/security/manager.py and inspect `_default_field_compare` at
lines
316-355. Note the early subset check on requested_values against
stored_values (line ~336)
before stored_query_context equivalents are merged.
2. Construct a repro in a unit test that mirrors stored chart and saved
query_context:
- Create a `stored_chart` whose params_dict does NOT include the
requested value for
`field_name` (e.g., params_dict.get("columns") == []).
- Create `stored_query_context` containing `{"queries":[{"columns":
["col_from_stored_query"]}]}` where the requested value
"col_from_stored_query" exists
only inside stored_query_context queries.
- Create a `query_context` with form_data containing field_name ->
["col_from_stored_query"].
3. Call `_default_field_compare(query_context, stored_chart,
stored_query_context,
"columns", ["columns","groupby"])` (function at lines 316-355).
- Current behavior: requested_values is compared to stored_values (from
params_dict)
first and returns True (modified), causing blocking even though the value
exists in
stored_query_context queries.
4. Expected: stored_query_context's equivalent fields should be merged into
stored_values
prior to the subset check so the request is recognized as unchanged. This
bug causes
false-positive blocks in `query_context_modified`
(superset/security/manager.py:372-415)
and thus can erroneously trigger SupersetSecurityManager.raise_for_access
guest-user
denial.
```
</details>
<details>
<summary><b>Prompt for AI Agent 🤖 </b></summary>
```mdx
This is a comment left during a code review.
**Path:** superset/security/manager.py
**Line:** 333:350
**Comment:**
*Logic Error: Logic bug in default field comparison: the code checks
that requested values are a subset of stored values before incorporating
equivalent values from the stored query_context, which can cause false
positives (blocking) when valid requested values exist only in the stored
query_context "queries" equivalencies. Merge stored_query_context equivalents
into `stored_values` before performing the requested subset check.
Validate the correctness of the flagged issue. If correct, How can I resolve
this? If you propose a fix, implement it and please make it concise.
```
</details>
##########
superset/security/manager.py:
##########
@@ -212,6 +213,162 @@ def freeze_value(value: Any) -> str:
return json.dumps(value, sort_keys=True)
+# -----------------------------------------------------------------------------
+# Field comparison configuration for query_context_modified()
+# -----------------------------------------------------------------------------
+
+
+class FieldMapping(TypedDict):
+ """Configuration for comparing a field between requested and stored
values."""
+
+ field: str
+ equivalent: list[str]
+ comparator: NotRequired[
+ Callable[
+ ["QueryContext", "Slice", set[str]], # query_context,
stored_chart, visible
+ bool, # True if modified (should block)
+ ]
+ ]
+
+
+def _extract_orderby_column_name(orderby_item: Any) -> str | None:
+ """
+ Extract column/metric name from an orderby tuple element.
+
+ Returns None for SQL expressions which should be blocked for security.
+ """
+ if isinstance(orderby_item, str):
+ return orderby_item
+
+ if isinstance(orderby_item, dict):
+ # Block adhoc SQL expressions - potential injection vector
+ if orderby_item.get("expressionType") == "SQL":
+ return None
+ if label := orderby_item.get("label"):
+ return label
+ if col := orderby_item.get("column"):
+ return col.get("column_name")
+
+ return None
+
+
+def _get_visible_columns(stored_chart: "Slice") -> set[str]:
+ """
+ Extract column/metric names visible in the chart.
+
+ Guest users can only sort by these columns (whitelist approach).
+ """
+ params = stored_chart.params_dict
+ visible: set[str] = set()
+
+ for col in params.get("columns") or []:
+ if isinstance(col, str):
+ visible.add(col)
+ elif isinstance(col, dict) and (label := col.get("label")):
+ visible.add(label)
+
+ for col in params.get("groupby") or []:
+ if isinstance(col, str):
+ visible.add(col)
+ elif isinstance(col, dict) and (label := col.get("label")):
+ visible.add(label)
+
+ for metric in params.get("metrics") or []:
+ if isinstance(metric, str):
+ visible.add(metric)
+ elif isinstance(metric, dict) and (label := metric.get("label")):
+ visible.add(label)
+
+ return visible
+
+
+def _orderby_whitelist_compare(
+ query_context: "QueryContext",
+ stored_chart: "Slice",
+ visible_columns: set[str],
+) -> bool:
+ """
+ Compare orderby using whitelist approach.
+
+ Allows sorting by any visible column, blocks hidden columns and SQL
expressions.
+ Returns True if modified (request should be blocked).
+ """
+ form_data = query_context.form_data or {}
+
+ # Check form_data orderby
+ for orderby_tuple in form_data.get("orderby") or []:
+ if isinstance(orderby_tuple, (list, tuple)) and len(orderby_tuple) >=
1:
+ col_name = _extract_orderby_column_name(orderby_tuple[0])
+ if col_name is None or col_name not in visible_columns:
+ return True
+
+ # Check queries orderby
+ for query in query_context.queries:
+ for orderby_tuple in getattr(query, "orderby", None) or []:
Review Comment:
**Suggestion:** In the orderby whitelist comparator, the code assumes each
`query` in `query_context.queries` is an object with an `orderby` attribute and
uses `getattr(query, "orderby", None)`. If `query` entries are dictionaries (as
they can be when coming from JSON), this misses orderby items. Normalize access
to support both dicts and objects so orderby tuples inside dict-based queries
are validated. [logic error]
<details>
<summary><b>Severity Level:</b> Critical 🚨</summary>
```mdx
- ❌ Guest payload orderby bypasses whitelist validation.
- ⚠️ query_context_modified can miss malicious orderby.
- ⚠️ Embedded dashboards' data restrictions risk bypass.
```
</details>
```suggestion
# Check queries orderby (support both object-like and dict-like query
entries)
for query in query_context.queries:
orderby_list = getattr(query, "orderby", None)
if orderby_list is None and isinstance(query, dict):
orderby_list = query.get("orderby") or []
for orderby_tuple in orderby_list or []:
```
<details>
<summary><b>Steps of Reproduction ✅ </b></summary>
```mdx
1. Open superset/security/manager.py and locate `_orderby_whitelist_compare`
at lines
285-313; note the queries loop uses `getattr(query, "orderby", None)` only.
2. Construct a `query_context` where `query_context.queries` contains a dict
entry with an
orderby list, for example:
- queries = [{"orderby": [["unauthorized_column", "desc"]]}]
- visible_columns (from stored chart) does NOT include
"unauthorized_column".
3. Call `_orderby_whitelist_compare(query_context, stored_chart,
visible_columns)`
(function at lines 285-313).
- Current behavior: `getattr(query, "orderby", None)` returns None for
dict entries,
the code iterates over `None or []` and therefore skips validating the
dict's `orderby`
list — the unauthorized orderby bypasses the whitelist check.
4. Expected behavior: dict-style query entries should be normalized (inspect
dict keys) so
`orderby` tuples are validated. The missing normalization causes a
security/validation
bypass in `query_context_modified`, which is used by
SupersetSecurityManager.raise_for_access.
```
</details>
<details>
<summary><b>Prompt for AI Agent 🤖 </b></summary>
```mdx
This is a comment left during a code review.
**Path:** superset/security/manager.py
**Line:** 305:307
**Comment:**
*Logic Error: In the orderby whitelist comparator, the code assumes
each `query` in `query_context.queries` is an object with an `orderby`
attribute and uses `getattr(query, "orderby", None)`. If `query` entries are
dictionaries (as they can be when coming from JSON), this misses orderby items.
Normalize access to support both dicts and objects so orderby tuples inside
dict-based queries are validated.
Validate the correctness of the flagged issue. If correct, How can I resolve
this? If you propose a fix, implement it and please make it concise.
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]