This is an automated email from the ASF dual-hosted git repository. michaelsmolina pushed a commit to branch 4.0 in repository https://gitbox.apache.org/repos/asf/superset.git
commit dc675a5d7fed10f26db6570acf25a2a739fe7d11 Author: John Bodley <[email protected]> AuthorDate: Thu Jun 13 08:54:54 2024 -0700 fix: Workaround for Pandas.DataFrame.to_csv bug (#28755) (cherry picked from commit 6b016da185dc659ffc0927a312ab13a485853e2f) --- superset/utils/csv.py | 2 +- .../utils/csv_tests.py | 44 ++++++++++++++-------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/superset/utils/csv.py b/superset/utils/csv.py index 74a33187a5..0d827e991d 100644 --- a/superset/utils/csv.py +++ b/superset/utils/csv.py @@ -78,7 +78,7 @@ def df_to_escaped_csv(df: pd.DataFrame, **kwargs: Any) -> Any: if isinstance(value, str): df.at[idx, name] = escape_value(value) - return df.to_csv(**kwargs) + return df.to_csv(escapechar="\\", **kwargs) def get_chart_csv_data( diff --git a/tests/integration_tests/utils/csv_tests.py b/tests/unit_tests/utils/csv_tests.py similarity index 77% rename from tests/integration_tests/utils/csv_tests.py rename to tests/unit_tests/utils/csv_tests.py index 38c1dd51ac..c1fbc779f0 100644 --- a/tests/integration_tests/utils/csv_tests.py +++ b/tests/unit_tests/utils/csv_tests.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -import io import pandas as pd import pyarrow as pa @@ -59,24 +58,39 @@ def test_escape_value(): def test_df_to_escaped_csv(): - csv_rows = [ - ["col_a", "=func()"], - ["-10", "=cmd|' /C calc'!A0"], - ["a", '""=b'], - [" =a", "b"], - ] - csv_str = "\n".join([",".join(row) for row in csv_rows]) - - df = pd.read_csv(io.StringIO(csv_str)) + df = pd.DataFrame( + data={ + "value": [ + "a", + "col_a", + "=func()", + "-10", + "=cmd|' /C calc'!A0", + '""=b', + " =a", + "\x00", + ] + } + ) + + escaped_csv_str = csv.df_to_escaped_csv( + df, + encoding="utf8", + index=False, + header=False, + ) - escaped_csv_str = csv.df_to_escaped_csv(df, encoding="utf8", index=False) escaped_csv_rows = [row.split(",") for row in escaped_csv_str.strip().split("\n")] assert escaped_csv_rows == [ - ["col_a", "'=func()"], - ["-10", r"'=cmd\|' /C calc'!A0"], - ["a", "'=b"], # pandas seems to be removing the leading "" - ["' =a", "b"], + ["a"], + ["col_a"], + ["'=func()"], + ["-10"], + [r"'=cmd\\|' /C calc'!A0"], + ['"\'""""=b"'], + ["' =a"], + ["\x00"], ] df = pa.array([1, None]).to_pandas(integer_object_nulls=True).to_frame()
