bito-code-review[bot] commented on code in PR #40859:
URL: https://github.com/apache/superset/pull/40859#discussion_r3376402180
##########
tests/unit_tests/charts/test_client_processing.py:
##########
@@ -2152,6 +2152,52 @@ def
test_apply_client_processing_csv_format_simple_table():
}
+def test_apply_client_processing_csv_format_escapes_formula_values():
+ """
+ A value starting with a formula trigger should be escaped in the CSV
+ output, consistent with the other CSV export paths.
+ """
+
+ result = {
+ "queries": [
+ {
+ "result_format": ChartDataResultFormat.CSV,
+ "data": "is_software_dev\n=SUM(1+1)\n",
+ }
+ ]
+ }
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "table",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "csv",
+ "result_type": "results",
+ }
+
+ processed = apply_client_processing(result, form_data)
+ # the leading "=" is neutralized with a single-quote prefix
+ assert "'=SUM(1+1)" in processed["queries"][0]["data"]
+ assert "\n=SUM(1+1)" not in processed["queries"][0]["data"]
Review Comment:
<div>
<div id="suggestion">
<div id="issue"><b>CWE-1236: Pandas 3.0 StringDtype not handled</b></div>
<div id="fix">
The test correctly validates CSV formula escaping
([CWE-1236](https://cwe.mitre.org/data/definitions/1236.html)), but
`df_to_escaped_csv` only checks for `np.dtype(object)` columns. In pandas 3.0+,
string columns use `StringDtype` (not object dtype), so the escaping loop at
csv.py:85 is skipped. The test will fail until `df_to_escaped_csv` is fixed to
also handle `pd.StringDtype()` columns.
</div>
<details>
<summary>
<b>Code suggestion</b>
</summary>
<blockquote>Check the AI-generated fix before applying</blockquote>
<div id="code">
```
--- a/superset/utils/csv.py
+++ b/superset/utils/csv.py
@@ -82,7 +82,7 @@ def df_to_escaped_csv(df: pd.DataFrame, **kwargs: Any) ->
Any:
# phantom rows and corrupt the output. Only string cells are
reassigned, so
# the dtype of mixed object columns (e.g. nullable integers) is
preserved.
for name, column in df.items():
- if column.dtype == np.dtype(object):
+ if pd.api.types.is_string_dtype(column.dtype):
for label, value in column.items():
if isinstance(value, str):
df.at[label, name] = escape_value(value)
```
</div>
</details>
</div>
<small><i>Code Review Run #a84d95</i></small>
</div>
---
Should Bito avoid suggestions like this for future reviews? (<a
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
- [ ] Yes, avoid them
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]