This is an automated email from the ASF dual-hosted git repository.
michaelsmolina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new ff025b78f3 fix(post-processing): handle missing values in cumulative
operator (#26429)
ff025b78f3 is described below
commit ff025b78f35f917f9d4a959154047727d3fc46f7
Author: Ville Brofeldt <[email protected]>
AuthorDate: Tue Jan 9 03:56:19 2024 -0800
fix(post-processing): handle missing values in cumulative operator (#26429)
---
superset/utils/pandas_postprocessing/cum.py | 1 +
tests/unit_tests/fixtures/dataframes.py | 5 +++++
tests/unit_tests/pandas_postprocessing/test_cum.py | 14 ++++++++++++++
3 files changed, 20 insertions(+)
diff --git a/superset/utils/pandas_postprocessing/cum.py
b/superset/utils/pandas_postprocessing/cum.py
index 128fa970f5..d3eb969f79 100644
--- a/superset/utils/pandas_postprocessing/cum.py
+++ b/superset/utils/pandas_postprocessing/cum.py
@@ -46,6 +46,7 @@ def cum(
"""
columns = columns or {}
df_cum = df.loc[:, columns.keys()]
+ df_cum = df_cum.fillna(0)
operation = "cum" + operator
if operation not in ALLOWLIST_CUMULATIVE_FUNCTIONS or not hasattr(
df_cum, operation
diff --git a/tests/unit_tests/fixtures/dataframes.py
b/tests/unit_tests/fixtures/dataframes.py
index 31a275b735..e1499792cb 100644
--- a/tests/unit_tests/fixtures/dataframes.py
+++ b/tests/unit_tests/fixtures/dataframes.py
@@ -130,6 +130,11 @@ timeseries_df = DataFrame(
data={"label": ["x", "y", "z", "q"], "y": [1.0, 2.0, 3.0, 4.0]},
)
+timeseries_with_gap_df = DataFrame(
+ index=to_datetime(["2019-01-01", "2019-01-02", "2019-01-05",
"2019-01-07"]),
+ data={"label": ["x", "y", "z", "q"], "y": [1.0, 2.0, None, 4.0]},
+)
+
timeseries_df2 = DataFrame(
index=to_datetime(["2019-01-01", "2019-01-02", "2019-01-05",
"2019-01-07"]),
data={
diff --git a/tests/unit_tests/pandas_postprocessing/test_cum.py
b/tests/unit_tests/pandas_postprocessing/test_cum.py
index 130e060252..25d7fd045f 100644
--- a/tests/unit_tests/pandas_postprocessing/test_cum.py
+++ b/tests/unit_tests/pandas_postprocessing/test_cum.py
@@ -24,6 +24,7 @@ from tests.unit_tests.fixtures.dataframes import (
multiple_metrics_df,
single_metric_df,
timeseries_df,
+ timeseries_with_gap_df,
)
from tests.unit_tests.pandas_postprocessing.utils import series_to_list
@@ -77,6 +78,19 @@ def test_cum():
)
+def test_cum_with_gap():
+ # create new column (cumsum)
+ post_df = pp.cum(
+ df=timeseries_with_gap_df,
+ columns={"y": "y2"},
+ operator="sum",
+ )
+ assert post_df.columns.tolist() == ["label", "y", "y2"]
+ assert series_to_list(post_df["label"]) == ["x", "y", "z", "q"]
+ assert series_to_list(post_df["y"]) == [1.0, 2.0, None, 4.0]
+ assert series_to_list(post_df["y2"]) == [1.0, 3.0, 3.0, 7.0]
+
+
def test_cum_after_pivot_with_single_metric():
pivot_df = pp.pivot(
df=single_metric_df,