This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch 4.0
in repository https://gitbox.apache.org/repos/asf/superset.git

commit dc675a5d7fed10f26db6570acf25a2a739fe7d11
Author: John Bodley <[email protected]>
AuthorDate: Thu Jun 13 08:54:54 2024 -0700

    fix: Workaround for Pandas.DataFrame.to_csv bug (#28755)
    
    (cherry picked from commit 6b016da185dc659ffc0927a312ab13a485853e2f)
---
 superset/utils/csv.py                              |  2 +-
 .../utils/csv_tests.py                             | 44 ++++++++++++++--------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/superset/utils/csv.py b/superset/utils/csv.py
index 74a33187a5..0d827e991d 100644
--- a/superset/utils/csv.py
+++ b/superset/utils/csv.py
@@ -78,7 +78,7 @@ def df_to_escaped_csv(df: pd.DataFrame, **kwargs: Any) -> Any:
                 if isinstance(value, str):
                     df.at[idx, name] = escape_value(value)
 
-    return df.to_csv(**kwargs)
+    return df.to_csv(escapechar="\\", **kwargs)
 
 
 def get_chart_csv_data(
diff --git a/tests/integration_tests/utils/csv_tests.py 
b/tests/unit_tests/utils/csv_tests.py
similarity index 77%
rename from tests/integration_tests/utils/csv_tests.py
rename to tests/unit_tests/utils/csv_tests.py
index 38c1dd51ac..c1fbc779f0 100644
--- a/tests/integration_tests/utils/csv_tests.py
+++ b/tests/unit_tests/utils/csv_tests.py
@@ -14,7 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import io
 
 import pandas as pd
 import pyarrow as pa
@@ -59,24 +58,39 @@ def test_escape_value():
 
 
 def test_df_to_escaped_csv():
-    csv_rows = [
-        ["col_a", "=func()"],
-        ["-10", "=cmd|' /C calc'!A0"],
-        ["a", '""=b'],
-        [" =a", "b"],
-    ]
-    csv_str = "\n".join([",".join(row) for row in csv_rows])
-
-    df = pd.read_csv(io.StringIO(csv_str))
+    df = pd.DataFrame(
+        data={
+            "value": [
+                "a",
+                "col_a",
+                "=func()",
+                "-10",
+                "=cmd|' /C calc'!A0",
+                '""=b',
+                " =a",
+                "\x00",
+            ]
+        }
+    )
+
+    escaped_csv_str = csv.df_to_escaped_csv(
+        df,
+        encoding="utf8",
+        index=False,
+        header=False,
+    )
 
-    escaped_csv_str = csv.df_to_escaped_csv(df, encoding="utf8", index=False)
     escaped_csv_rows = [row.split(",") for row in 
escaped_csv_str.strip().split("\n")]
 
     assert escaped_csv_rows == [
-        ["col_a", "'=func()"],
-        ["-10", r"'=cmd\|' /C calc'!A0"],
-        ["a", "'=b"],  # pandas seems to be removing the leading ""
-        ["' =a", "b"],
+        ["a"],
+        ["col_a"],
+        ["'=func()"],
+        ["-10"],
+        [r"'=cmd\\|' /C calc'!A0"],
+        ['"\'""""=b"'],
+        ["' =a"],
+        ["\x00"],
     ]
 
     df = pa.array([1, None]).to_pandas(integer_object_nulls=True).to_frame()

Reply via email to