This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch sc_73447 in repository https://gitbox.apache.org/repos/asf/superset.git
commit 4ecf3b911849d8abd457a39919d64aa91b804a59 Author: Beto Dealmeida <[email protected]> AuthorDate: Thu Aug 10 16:14:50 2023 -0700 fix: to_datetime in Pandas 2 --- superset/utils/core.py | 11 +++++++++-- tests/unit_tests/utils/test_core.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index cd8c62efe7..e0ebf8cb9a 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1834,17 +1834,24 @@ def normalize_dttm_col( # Column is formatted as a numeric value unit = _col.timestamp_format.replace("epoch_", "") df[_col.col_label] = pd.to_datetime( - dttm_series, utc=False, unit=unit, origin="unix", errors="coerce" + dttm_series, + utc=False, + unit=unit, + origin="unix", + errors="raise", + exact=False, ) else: # Column has already been formatted as a timestamp. df[_col.col_label] = dttm_series.apply(pd.Timestamp) else: + print(_col.timestamp_format) df[_col.col_label] = pd.to_datetime( df[_col.col_label], utc=False, format=_col.timestamp_format, - errors="coerce", + errors="raise", + exact=False, ) if _col.offset: df[_col.col_label] += timedelta(hours=_col.offset) diff --git a/tests/unit_tests/utils/test_core.py b/tests/unit_tests/utils/test_core.py index 568595517c..562ebe582e 100644 --- a/tests/unit_tests/utils/test_core.py +++ b/tests/unit_tests/utils/test_core.py @@ -17,11 +17,14 @@ import os from typing import Any, Optional +import pandas as pd import pytest from superset.utils.core import ( cast_to_boolean, + DateColumn, is_test, + normalize_dttm_col, parse_boolean_string, QueryObjectFilterClause, remove_extra_adhoc_filters, @@ -171,3 +174,30 @@ def test_other_values(): assert cast_to_boolean([]) is False assert cast_to_boolean({}) is False assert cast_to_boolean(object()) is False + + +def test_normalize_dttm_col() -> None: + """ + Tests for the ``normalize_dttm_col`` function. + + In particular, this covers a regression when Pandas was upgraded from 1.5.3 to + 2.0.3 and the behavior of ``pd.to_datetime`` changed. + """ + df = pd.DataFrame({"__time": ["2017-07-01T00:00:00.000Z"]}) + assert ( + df.to_markdown() + == """ +| | __time | +|---:|:-------------------------| +| 0 | 2017-07-01T00:00:00.000Z | + """.strip() + ) + + # in 1.5.3 this would return a datetime64[ns] dtype, but in 2.0.3 we had to + # add ``exact=False`` since there is a leftover after parsing the format + dttm_cols = (DateColumn("__time", "%Y-%m-%d"),) + + # the function modifies the dataframe in place + normalize_dttm_col(df, dttm_cols) + + assert df["__time"].astype(str).tolist() == ["2017-07-01"]
