(superset) 06/40: fix(excel export): big number truncation handling (#32739)

michaelsmolina Mon, 14 Apr 2025 10:17:55 -0700

This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch 5.0
in repository https://gitbox.apache.org/repos/asf/superset.git


commit 59d03a3847f3cbb7a66f7456e149a8a2347a264a
Author: CharlesNkdl <[email protected]>
AuthorDate: Fri Mar 21 17:39:59 2025 +0100

    fix(excel export): big number truncation handling (#32739)
    
    (cherry picked from commit c0f83a7467d0782703ec534bc9e1b8b5e80a2978)
---
 superset/utils/excel.py               | 14 ++++++++++++++
 tests/unit_tests/utils/excel_tests.py | 24 ++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/superset/utils/excel.py b/superset/utils/excel.py
index d34446832a..46e1a1f071 100644
--- a/superset/utils/excel.py
+++ b/superset/utils/excel.py
@@ -56,10 +56,24 @@ def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
 def apply_column_types(
     df: pd.DataFrame, column_types: list[GenericDataType]
 ) -> pd.DataFrame:
+    """
+    Applies the column types to the dataframe to prepare for an excel export
+
+    :param df: The dataframe to apply the column types to
+    :param column_types: The types of the columns
+    :return: The dataframe with the column types applied
+    """
     for column, column_type in zip(df.columns, column_types, strict=False):
         if column_type == GenericDataType.NUMERIC:
             try:
                 df[column] = pd.to_numeric(df[column])
+                # if the number is too large, convert it to a string
+                # Excel does not support numbers larger than 10^15
+                df[column] = df[column].apply(
+                    lambda x: str(x)
+                    if isinstance(x, (int, float)) and abs(x) > 10**15
+                    else x
+                )
             except ValueError:
                 df[column] = df[column].astype(str)
         elif pd.api.types.is_datetime64tz_dtype(df[column]):
diff --git a/tests/unit_tests/utils/excel_tests.py 
b/tests/unit_tests/utils/excel_tests.py
index deb6d3d0b4..b07fe7a4f0 100644
--- a/tests/unit_tests/utils/excel_tests.py
+++ b/tests/unit_tests/utils/excel_tests.py
@@ -105,3 +105,27 @@ def test_column_data_types_with_failing_conversion():
     assert not is_numeric_dtype(df["col1"])
     assert not is_numeric_dtype(df["col2"])
     assert not is_numeric_dtype(df["col3"])
+
+
+def test_column_data_types_with_large_numeric_values():
+    df = pd.DataFrame(
+        {
+            "big_number": [
+                10**14,
+                999999999999999,
+                10**15 + 1,
+                10**16,
+                1100108628127863,
+                2**54,
+            ],
+        }
+    )
+    apply_column_types(df, [GenericDataType.NUMERIC])
+    assert df["big_number"].tolist() == [
+        100000000000000,
+        999999999999999,
+        "1000000000000001",
+        "10000000000000000",
+        "1100108628127863",
+        "18014398509481984",
+    ]

(superset) 06/40: fix(excel export): big number truncation handling (#32739)

Reply via email to