[incubator-superset] branch master updated: check for empty dataframes before processing results (#10361)

villebro Sun, 19 Jul 2020 07:21:02 -0700

This is an automated email from the ASF dual-hosted git repository.

villebro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git



The following commit(s) were added to refs/heads/master by this push:
     new 73797b8  check for empty dataframes before processing results (#10361)
73797b8 is described below

commit 73797b8b6406ba9ca54fb603d6bd7a6c92ee4cd9
Author: bryanck <brya...@gmail.com>
AuthorDate: Sun Jul 19 07:19:30 2020 -0700

    check for empty dataframes before processing results (#10361)
    
    Co-authored-by: Bryan Keller <bkel...@netflix.com>
---
 INTHEWILD.md       |  1 +
 superset/viz.py    | 43 ++++++++++++++++++++++++++++---------------
 tests/viz_tests.py | 10 +++++++++-
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/INTHEWILD.md b/INTHEWILD.md
index c3dde1f..c716221 100644
--- a/INTHEWILD.md
+++ b/INTHEWILD.md
@@ -101,6 +101,7 @@ Join our growing community!
 - [bilibili](https://www.bilibili.com) [@Moinheart]
 - [Douban](https://www.douban.com/) [@luchuan]
 - [Kuaishou](https://www.kuaishou.com/) [@zhaoyu89730105]
+- [Netflix](https://www.netflix.com/)
 - [TME QQMUSIC/WESING](https://www.tencentmusic.com/) 
 - [Zaihang](http://www.zaih.com/)
 
diff --git a/superset/viz.py b/superset/viz.py
index 6e6f86d..8bceef6 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -661,21 +661,19 @@ class TableViz(BaseViz):
         # Transform the data frame to adhere to the UI ordering of the columns 
and
         # metrics whilst simultaneously computing the percentages (via 
normalization)
         # for the percent metrics.
-        if not df.empty:
-            columns, percent_columns = self.columns, self.percent_columns
-            if DTTM_ALIAS in df and self.is_timeseries:
-                columns = [DTTM_ALIAS] + columns
-            df = pd.concat(
-                [
-                    df[columns],
-                    (
-                        df[percent_columns]
-                        .div(df[percent_columns].sum())
-                        .add_prefix("%")
-                    ),
-                ],
-                axis=1,
-            )
+        if df.empty:
+            return None
+
+        columns, percent_columns = self.columns, self.percent_columns
+        if DTTM_ALIAS in df and self.is_timeseries:
+            columns = [DTTM_ALIAS] + columns
+        df = pd.concat(
+            [
+                df[columns],
+                
(df[percent_columns].div(df[percent_columns].sum()).add_prefix("%")),
+            ],
+            axis=1,
+        )
         return self.handle_js_int_overflow(
             dict(records=df.to_dict(orient="records"), 
columns=list(df.columns))
         )
@@ -852,6 +850,9 @@ class CalHeatmapViz(BaseViz):
     is_timeseries = True
 
     def get_data(self, df: pd.DataFrame) -> VizData:
+        if df.empty:
+            return None
+
         form_data = self.form_data
 
         data = {}
@@ -1074,6 +1075,8 @@ class BulletViz(NVD3Viz):
         return d
 
     def get_data(self, df: pd.DataFrame) -> VizData:
+        if df.empty:
+            return None
         df["metric"] = df[[utils.get_metric_name(self.metric)]]
         values = df["metric"].values
         return {
@@ -1665,6 +1668,8 @@ class SunburstViz(BaseViz):
     )
 
     def get_data(self, df: pd.DataFrame) -> VizData:
+        if df.empty:
+            return None
         fd = self.form_data
         cols = fd.get("groupby") or []
         cols.extend(["m1", "m2"])
@@ -1715,6 +1720,8 @@ class SankeyViz(BaseViz):
         return qry
 
     def get_data(self, df: pd.DataFrame) -> VizData:
+        if df.empty:
+            return None
         source, target = self.groupby
         (value,) = self.metric_labels
         df.rename(
@@ -1774,6 +1781,8 @@ class DirectedForceViz(BaseViz):
         return qry
 
     def get_data(self, df: pd.DataFrame) -> VizData:
+        if df.empty:
+            return None
         df.columns = ["source", "target", "value"]
         return df.to_dict(orient="records")
 
@@ -1827,6 +1836,8 @@ class CountryMapViz(BaseViz):
         return qry
 
     def get_data(self, df: pd.DataFrame) -> VizData:
+        if df.empty:
+            return None
         fd = self.form_data
         cols = [fd.get("entity")]
         metric = self.metric_labels[0]
@@ -2915,6 +2926,8 @@ class PartitionViz(NVD3TimeSeriesViz):
         ]
 
     def get_data(self, df: pd.DataFrame) -> VizData:
+        if df.empty:
+            return None
         fd = self.form_data
         groups = fd.get("groupby", [])
         time_op = fd.get("time_series_option", "not_time")
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index 8290fbf..c6d0c80 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -825,8 +825,16 @@ class TestPartitionViz(SupersetTestCase):
         )
 
     def test_get_data_calls_correct_method(self):
+        raw = {}
+        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
+        raw["groupA"] = ["a1", "a1", "a1", "b1", "b1", "b1", "c1", "c1", "c1"]
+        raw["groupB"] = ["a2", "a2", "a2", "b2", "b2", "b2", "c2", "c2", "c2"]
+        raw["groupC"] = ["a3", "a3", "a3", "b3", "b3", "b3", "c3", "c3", "c3"]
+        raw["metric1"] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        raw["metric2"] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+        raw["metric3"] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
+        df = pd.DataFrame(raw)
         test_viz = viz.PartitionViz(Mock(), {})
-        df = Mock()
         with self.assertRaises(ValueError):
             test_viz.get_data(df)
         test_viz.levels_for = Mock(return_value=1)

[incubator-superset] branch master updated: check for empty dataframes before processing results (#10361)

Reply via email to