This is an automated email from the ASF dual-hosted git repository.

elizabeth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new c2be54c335 fix: pandas bug when data is blank on post-processing 
(#20629)
c2be54c335 is described below

commit c2be54c335d14521afc1221749e8554ff23c215b
Author: Elizabeth Thompson <[email protected]>
AuthorDate: Fri Jul 8 16:33:53 2022 -0700

    fix: pandas bug when data is blank on post-processing (#20629)
    
    * fix pandas bug when data is blank on post-processing
    
    * account for multiple queries when data is blank
---
 superset/charts/post_processing.py              |   9 +-
 tests/unit_tests/charts/test_post_processing.py | 599 +++++++++++++++++++++++-
 2 files changed, 605 insertions(+), 3 deletions(-)

diff --git a/superset/charts/post_processing.py 
b/superset/charts/post_processing.py
index 715f465574..81d2a09b04 100644
--- a/superset/charts/post_processing.py
+++ b/superset/charts/post_processing.py
@@ -324,12 +324,17 @@ def apply_post_process(
     post_processor = post_processors[viz_type]
 
     for query in result["queries"]:
+        if query["result_format"] not in (rf.value for rf in 
ChartDataResultFormat):
+            raise Exception(f"Result format {query['result_format']} not 
supported")
+
+        if not query["data"]:
+            # do not try to process empty data
+            continue
+
         if query["result_format"] == ChartDataResultFormat.JSON:
             df = pd.DataFrame.from_dict(query["data"])
         elif query["result_format"] == ChartDataResultFormat.CSV:
             df = pd.read_csv(StringIO(query["data"]))
-        else:
-            raise Exception(f"Result format {query['result_format']} not 
supported")
 
         processed_df = post_processor(df, form_data, datasource)
 
diff --git a/tests/unit_tests/charts/test_post_processing.py 
b/tests/unit_tests/charts/test_post_processing.py
index f8586be1ff..05d3fe6e0b 100644
--- a/tests/unit_tests/charts/test_post_processing.py
+++ b/tests/unit_tests/charts/test_post_processing.py
@@ -15,9 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import json
+
 import pandas as pd
+from numpy import True_
+from pytest import raises
 
-from superset.charts.post_processing import pivot_df, table
+from superset.charts.post_processing import apply_post_process, pivot_df, table
+from superset.common.chart_data import ChartDataResultFormat
+from superset.utils.core import GenericDataType
 
 
 def test_pivot_df_no_cols_no_rows_single_metric():
@@ -1363,3 +1369,594 @@ def test_table():
 |  0 | 80,679,663 |
     """.strip()
     )
+
+
+def test_apply_post_process_no_form_invalid_viz_type():
+    """
+    Test with invalid viz type. It should just return the result
+    """
+    result = {"foo": "bar"}
+    form_data = {"viz_type": "baz"}
+    assert apply_post_process(result, form_data) == result
+
+
+def test_apply_post_process_without_result_format():
+    """
+    A query without result_format should raise an exception
+    """
+    result = {"queries": [{"result_format": "foo"}]}
+    form_data = {"viz_type": "pivot_table"}
+
+    with raises(Exception) as ex:
+        apply_post_process(result, form_data)
+
+    assert ex.match("Result format foo not supported") == True
+
+
+def test_apply_post_process_json_format():
+    """
+    It should be able to process json results
+    """
+
+    result = {
+        "queries": [
+            {
+                "result_format": ChartDataResultFormat.JSON,
+                "data": {
+                    "result": [
+                        {
+                            "data": [{"COUNT(is_software_dev)": 4725}],
+                            "colnames": ["COUNT(is_software_dev)"],
+                            "coltypes": [0],
+                        }
+                    ]
+                },
+            }
+        ]
+    }
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "pivot_table_v2",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_post_process(result, form_data) == {
+        "queries": [
+            {
+                "result_format": ChartDataResultFormat.JSON,
+                "data": {
+                    "result": {
+                        "Total (Sum)": {
+                            "data": [{"COUNT(is_software_dev)": 4725}],
+                            "colnames": ["COUNT(is_software_dev)"],
+                            "coltypes": [0],
+                        }
+                    }
+                },
+                "colnames": [("result",)],
+                "indexnames": [("Total (Sum)",)],
+                "coltypes": [GenericDataType.STRING],
+                "rowcount": 1,
+            }
+        ]
+    }
+
+
+def test_apply_post_process_csv_format():
+    """
+    It should be able to process csv results
+    """
+
+    result = {
+        "queries": [
+            {
+                "result_format": ChartDataResultFormat.CSV,
+                "data": """
+COUNT(is_software_dev)
+4725
+""",
+            }
+        ]
+    }
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "pivot_table_v2",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_post_process(result, form_data) == {
+        "queries": [
+            {
+                "result_format": ChartDataResultFormat.CSV,
+                "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
+                "colnames": [("COUNT(is_software_dev)",)],
+                "indexnames": [("Total (Sum)",)],
+                "coltypes": [GenericDataType.NUMERIC],
+                "rowcount": 1,
+            }
+        ]
+    }
+
+
+def test_apply_post_process_csv_format_empty_string():
+    """
+    It should be able to process csv results with no data
+    """
+
+    result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": 
""}]}
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "pivot_table_v2",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_post_process(result, form_data) == {
+        "queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]
+    }
+
+
+def test_apply_post_process_csv_format_no_data():
+    """
+    It should be able to process csv results with no data
+    """
+
+    result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": 
None}]}
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "pivot_table_v2",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_post_process(result, form_data) == {
+        "queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}]
+    }
+
+
+def test_apply_post_process_csv_format_no_data_multiple_queries():
+    """
+    It should be able to process csv results multiple queries if one query has 
no data
+    """
+
+    result = {
+        "queries": [
+            {"result_format": ChartDataResultFormat.CSV, "data": ""},
+            {
+                "result_format": ChartDataResultFormat.CSV,
+                "data": """
+COUNT(is_software_dev)
+4725
+""",
+            },
+        ]
+    }
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "pivot_table_v2",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_post_process(result, form_data) == {
+        "queries": [
+            {"result_format": ChartDataResultFormat.CSV, "data": ""},
+            {
+                "result_format": ChartDataResultFormat.CSV,
+                "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
+                "colnames": [("COUNT(is_software_dev)",)],
+                "indexnames": [("Total (Sum)",)],
+                "coltypes": [GenericDataType.NUMERIC],
+                "rowcount": 1,
+            },
+        ]
+    }
+
+
+def test_apply_post_process_json_format_empty_string():
+    """
+    It should be able to process json results with no data
+    """
+
+    result = {"queries": [{"result_format": ChartDataResultFormat.JSON, 
"data": ""}]}
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "pivot_table_v2",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_post_process(result, form_data) == {
+        "queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]
+    }
+
+
+def test_apply_post_process_json_format_data_is_none():
+    """
+    It should be able to process json results with no data
+    """
+
+    result = {"queries": [{"result_format": ChartDataResultFormat.JSON, 
"data": None}]}
+    form_data = {
+        "datasource": "19__table",
+        "viz_type": "pivot_table_v2",
+        "slice_id": 69,
+        "url_params": {},
+        "granularity_sqla": "time_start",
+        "time_grain_sqla": "P1D",
+        "time_range": "No filter",
+        "groupbyColumns": [],
+        "groupbyRows": [],
+        "metrics": [
+            {
+                "aggregate": "COUNT",
+                "column": {
+                    "column_name": "is_software_dev",
+                    "description": None,
+                    "expression": None,
+                    "filterable": True,
+                    "groupby": True,
+                    "id": 1463,
+                    "is_dttm": False,
+                    "python_date_format": None,
+                    "type": "DOUBLE PRECISION",
+                    "verbose_name": None,
+                },
+                "expressionType": "SIMPLE",
+                "hasCustomLabel": False,
+                "isNew": False,
+                "label": "COUNT(is_software_dev)",
+                "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+                "sqlExpression": None,
+            }
+        ],
+        "metricsLayout": "COLUMNS",
+        "adhoc_filters": [
+            {
+                "clause": "WHERE",
+                "comparator": "Currently A Developer",
+                "expressionType": "SIMPLE",
+                "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+                "isExtra": False,
+                "isNew": False,
+                "operator": "==",
+                "sqlExpression": None,
+                "subject": "developer_type",
+            }
+        ],
+        "row_limit": 10000,
+        "order_desc": True,
+        "aggregateFunction": "Sum",
+        "valueFormat": "SMART_NUMBER",
+        "date_format": "smart_date",
+        "rowOrder": "key_a_to_z",
+        "colOrder": "key_a_to_z",
+        "extra_form_data": {},
+        "force": False,
+        "result_format": "json",
+        "result_type": "results",
+    }
+
+    assert apply_post_process(result, form_data) == {
+        "queries": [{"result_format": ChartDataResultFormat.JSON, "data": 
None}]
+    }

Reply via email to