This is an automated email from the ASF dual-hosted git repository. michaelsmolina pushed a commit to branch 5.0 in repository https://gitbox.apache.org/repos/asf/superset.git
commit 59d03a3847f3cbb7a66f7456e149a8a2347a264a Author: CharlesNkdl <[email protected]> AuthorDate: Fri Mar 21 17:39:59 2025 +0100 fix(excel export): big number truncation handling (#32739) (cherry picked from commit c0f83a7467d0782703ec534bc9e1b8b5e80a2978) --- superset/utils/excel.py | 14 ++++++++++++++ tests/unit_tests/utils/excel_tests.py | 24 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/superset/utils/excel.py b/superset/utils/excel.py index d34446832a..46e1a1f071 100644 --- a/superset/utils/excel.py +++ b/superset/utils/excel.py @@ -56,10 +56,24 @@ def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any: def apply_column_types( df: pd.DataFrame, column_types: list[GenericDataType] ) -> pd.DataFrame: + """ + Applies the column types to the dataframe to prepare for an excel export + + :param df: The dataframe to apply the column types to + :param column_types: The types of the columns + :return: The dataframe with the column types applied + """ for column, column_type in zip(df.columns, column_types, strict=False): if column_type == GenericDataType.NUMERIC: try: df[column] = pd.to_numeric(df[column]) + # if the number is too large, convert it to a string + # Excel does not support numbers larger than 10^15 + df[column] = df[column].apply( + lambda x: str(x) + if isinstance(x, (int, float)) and abs(x) > 10**15 + else x + ) except ValueError: df[column] = df[column].astype(str) elif pd.api.types.is_datetime64tz_dtype(df[column]): diff --git a/tests/unit_tests/utils/excel_tests.py b/tests/unit_tests/utils/excel_tests.py index deb6d3d0b4..b07fe7a4f0 100644 --- a/tests/unit_tests/utils/excel_tests.py +++ b/tests/unit_tests/utils/excel_tests.py @@ -105,3 +105,27 @@ def test_column_data_types_with_failing_conversion(): assert not is_numeric_dtype(df["col1"]) assert not is_numeric_dtype(df["col2"]) assert not is_numeric_dtype(df["col3"]) + + +def test_column_data_types_with_large_numeric_values(): + df = pd.DataFrame( + { + "big_number": [ + 10**14, + 999999999999999, + 10**15 + 1, + 10**16, + 1100108628127863, + 2**54, + ], + } + ) + apply_column_types(df, [GenericDataType.NUMERIC]) + assert df["big_number"].tolist() == [ + 100000000000000, + 999999999999999, + "1000000000000001", + "10000000000000000", + "1100108628127863", + "18014398509481984", + ]
