This is an automated email from the ASF dual-hosted git repository.
elizabeth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new c2be54c335 fix: pandas bug when data is blank on post-processing
(#20629)
c2be54c335 is described below
commit c2be54c335d14521afc1221749e8554ff23c215b
Author: Elizabeth Thompson <[email protected]>
AuthorDate: Fri Jul 8 16:33:53 2022 -0700
fix: pandas bug when data is blank on post-processing (#20629)
* fix pandas bug when data is blank on post-processing
* account for multiple queries when data is blank
---
superset/charts/post_processing.py | 9 +-
tests/unit_tests/charts/test_post_processing.py | 599 +++++++++++++++++++++++-
2 files changed, 605 insertions(+), 3 deletions(-)
diff --git a/superset/charts/post_processing.py
b/superset/charts/post_processing.py
index 715f465574..81d2a09b04 100644
--- a/superset/charts/post_processing.py
+++ b/superset/charts/post_processing.py
@@ -324,12 +324,17 @@ def apply_post_process(
post_processor = post_processors[viz_type]
for query in result["queries"]:
+ if query["result_format"] not in (rf.value for rf in
ChartDataResultFormat):
+ raise Exception(f"Result format {query['result_format']} not
supported")
+
+ if not query["data"]:
+ # do not try to process empty data
+ continue
+
if query["result_format"] == ChartDataResultFormat.JSON:
df = pd.DataFrame.from_dict(query["data"])
elif query["result_format"] == ChartDataResultFormat.CSV:
df = pd.read_csv(StringIO(query["data"]))
- else:
- raise Exception(f"Result format {query['result_format']} not
supported")
processed_df = post_processor(df, form_data, datasource)
diff --git a/tests/unit_tests/charts/test_post_processing.py
b/tests/unit_tests/charts/test_post_processing.py
index f8586be1ff..05d3fe6e0b 100644
--- a/tests/unit_tests/charts/test_post_processing.py
+++ b/tests/unit_tests/charts/test_post_processing.py
@@ -15,9 +15,15 @@
# specific language governing permissions and limitations
# under the License.
+import json
+
import pandas as pd
+from numpy import True_
+from pytest import raises
-from superset.charts.post_processing import pivot_df, table
+from superset.charts.post_processing import apply_post_process, pivot_df, table
+from superset.common.chart_data import ChartDataResultFormat
+from superset.utils.core import GenericDataType
def test_pivot_df_no_cols_no_rows_single_metric():
@@ -1363,3 +1369,594 @@ def test_table():
| 0 | 80,679,663 |
""".strip()
)
+
+
+def test_apply_post_process_no_form_invalid_viz_type():
+ """
+ Test with invalid viz type. It should just return the result
+ """
+ result = {"foo": "bar"}
+ form_data = {"viz_type": "baz"}
+ assert apply_post_process(result, form_data) == result
+
+
+def test_apply_post_process_without_result_format():
+ """
+ A query without result_format should raise an exception
+ """
+ result = {"queries": [{"result_format": "foo"}]}
+ form_data = {"viz_type": "pivot_table"}
+
+ with raises(Exception) as ex:
+ apply_post_process(result, form_data)
+
+ assert ex.match("Result format foo not supported") == True
+
+
+def test_apply_post_process_json_format():
+ """
+ It should be able to process json results
+ """
+
+ result = {
+ "queries": [
+ {
+ "result_format": ChartDataResultFormat.JSON,
+ "data": {
+ "result": [
+ {
+ "data": [{"COUNT(is_software_dev)": 4725}],
+ "colnames": ["COUNT(is_software_dev)"],
+ "coltypes": [0],
+ }
+ ]
+ },
+ }
+ ]
+ }
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "pivot_table_v2",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [
+ {
+ "aggregate": "COUNT",
+ "column": {
+ "column_name": "is_software_dev",
+ "description": None,
+ "expression": None,
+ "filterable": True,
+ "groupby": True,
+ "id": 1463,
+ "is_dttm": False,
+ "python_date_format": None,
+ "type": "DOUBLE PRECISION",
+ "verbose_name": None,
+ },
+ "expressionType": "SIMPLE",
+ "hasCustomLabel": False,
+ "isNew": False,
+ "label": "COUNT(is_software_dev)",
+ "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+ "sqlExpression": None,
+ }
+ ],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [
+ {
+ "clause": "WHERE",
+ "comparator": "Currently A Developer",
+ "expressionType": "SIMPLE",
+ "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+ "isExtra": False,
+ "isNew": False,
+ "operator": "==",
+ "sqlExpression": None,
+ "subject": "developer_type",
+ }
+ ],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "json",
+ "result_type": "results",
+ }
+
+ assert apply_post_process(result, form_data) == {
+ "queries": [
+ {
+ "result_format": ChartDataResultFormat.JSON,
+ "data": {
+ "result": {
+ "Total (Sum)": {
+ "data": [{"COUNT(is_software_dev)": 4725}],
+ "colnames": ["COUNT(is_software_dev)"],
+ "coltypes": [0],
+ }
+ }
+ },
+ "colnames": [("result",)],
+ "indexnames": [("Total (Sum)",)],
+ "coltypes": [GenericDataType.STRING],
+ "rowcount": 1,
+ }
+ ]
+ }
+
+
+def test_apply_post_process_csv_format():
+ """
+ It should be able to process csv results
+ """
+
+ result = {
+ "queries": [
+ {
+ "result_format": ChartDataResultFormat.CSV,
+ "data": """
+COUNT(is_software_dev)
+4725
+""",
+ }
+ ]
+ }
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "pivot_table_v2",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [
+ {
+ "aggregate": "COUNT",
+ "column": {
+ "column_name": "is_software_dev",
+ "description": None,
+ "expression": None,
+ "filterable": True,
+ "groupby": True,
+ "id": 1463,
+ "is_dttm": False,
+ "python_date_format": None,
+ "type": "DOUBLE PRECISION",
+ "verbose_name": None,
+ },
+ "expressionType": "SIMPLE",
+ "hasCustomLabel": False,
+ "isNew": False,
+ "label": "COUNT(is_software_dev)",
+ "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+ "sqlExpression": None,
+ }
+ ],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [
+ {
+ "clause": "WHERE",
+ "comparator": "Currently A Developer",
+ "expressionType": "SIMPLE",
+ "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+ "isExtra": False,
+ "isNew": False,
+ "operator": "==",
+ "sqlExpression": None,
+ "subject": "developer_type",
+ }
+ ],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "json",
+ "result_type": "results",
+ }
+
+ assert apply_post_process(result, form_data) == {
+ "queries": [
+ {
+ "result_format": ChartDataResultFormat.CSV,
+ "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
+ "colnames": [("COUNT(is_software_dev)",)],
+ "indexnames": [("Total (Sum)",)],
+ "coltypes": [GenericDataType.NUMERIC],
+ "rowcount": 1,
+ }
+ ]
+ }
+
+
+def test_apply_post_process_csv_format_empty_string():
+ """
+ It should be able to process csv results with no data
+ """
+
+ result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data":
""}]}
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "pivot_table_v2",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [
+ {
+ "aggregate": "COUNT",
+ "column": {
+ "column_name": "is_software_dev",
+ "description": None,
+ "expression": None,
+ "filterable": True,
+ "groupby": True,
+ "id": 1463,
+ "is_dttm": False,
+ "python_date_format": None,
+ "type": "DOUBLE PRECISION",
+ "verbose_name": None,
+ },
+ "expressionType": "SIMPLE",
+ "hasCustomLabel": False,
+ "isNew": False,
+ "label": "COUNT(is_software_dev)",
+ "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+ "sqlExpression": None,
+ }
+ ],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [
+ {
+ "clause": "WHERE",
+ "comparator": "Currently A Developer",
+ "expressionType": "SIMPLE",
+ "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+ "isExtra": False,
+ "isNew": False,
+ "operator": "==",
+ "sqlExpression": None,
+ "subject": "developer_type",
+ }
+ ],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "json",
+ "result_type": "results",
+ }
+
+ assert apply_post_process(result, form_data) == {
+ "queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]
+ }
+
+
+def test_apply_post_process_csv_format_no_data():
+ """
+ It should be able to process csv results with no data
+ """
+
+ result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data":
None}]}
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "pivot_table_v2",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [
+ {
+ "aggregate": "COUNT",
+ "column": {
+ "column_name": "is_software_dev",
+ "description": None,
+ "expression": None,
+ "filterable": True,
+ "groupby": True,
+ "id": 1463,
+ "is_dttm": False,
+ "python_date_format": None,
+ "type": "DOUBLE PRECISION",
+ "verbose_name": None,
+ },
+ "expressionType": "SIMPLE",
+ "hasCustomLabel": False,
+ "isNew": False,
+ "label": "COUNT(is_software_dev)",
+ "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+ "sqlExpression": None,
+ }
+ ],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [
+ {
+ "clause": "WHERE",
+ "comparator": "Currently A Developer",
+ "expressionType": "SIMPLE",
+ "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+ "isExtra": False,
+ "isNew": False,
+ "operator": "==",
+ "sqlExpression": None,
+ "subject": "developer_type",
+ }
+ ],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "json",
+ "result_type": "results",
+ }
+
+ assert apply_post_process(result, form_data) == {
+ "queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}]
+ }
+
+
+def test_apply_post_process_csv_format_no_data_multiple_queries():
+ """
+ It should be able to process csv results multiple queries if one query has
no data
+ """
+
+ result = {
+ "queries": [
+ {"result_format": ChartDataResultFormat.CSV, "data": ""},
+ {
+ "result_format": ChartDataResultFormat.CSV,
+ "data": """
+COUNT(is_software_dev)
+4725
+""",
+ },
+ ]
+ }
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "pivot_table_v2",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [
+ {
+ "aggregate": "COUNT",
+ "column": {
+ "column_name": "is_software_dev",
+ "description": None,
+ "expression": None,
+ "filterable": True,
+ "groupby": True,
+ "id": 1463,
+ "is_dttm": False,
+ "python_date_format": None,
+ "type": "DOUBLE PRECISION",
+ "verbose_name": None,
+ },
+ "expressionType": "SIMPLE",
+ "hasCustomLabel": False,
+ "isNew": False,
+ "label": "COUNT(is_software_dev)",
+ "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+ "sqlExpression": None,
+ }
+ ],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [
+ {
+ "clause": "WHERE",
+ "comparator": "Currently A Developer",
+ "expressionType": "SIMPLE",
+ "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+ "isExtra": False,
+ "isNew": False,
+ "operator": "==",
+ "sqlExpression": None,
+ "subject": "developer_type",
+ }
+ ],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "json",
+ "result_type": "results",
+ }
+
+ assert apply_post_process(result, form_data) == {
+ "queries": [
+ {"result_format": ChartDataResultFormat.CSV, "data": ""},
+ {
+ "result_format": ChartDataResultFormat.CSV,
+ "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n",
+ "colnames": [("COUNT(is_software_dev)",)],
+ "indexnames": [("Total (Sum)",)],
+ "coltypes": [GenericDataType.NUMERIC],
+ "rowcount": 1,
+ },
+ ]
+ }
+
+
+def test_apply_post_process_json_format_empty_string():
+ """
+ It should be able to process json results with no data
+ """
+
+ result = {"queries": [{"result_format": ChartDataResultFormat.JSON,
"data": ""}]}
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "pivot_table_v2",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [
+ {
+ "aggregate": "COUNT",
+ "column": {
+ "column_name": "is_software_dev",
+ "description": None,
+ "expression": None,
+ "filterable": True,
+ "groupby": True,
+ "id": 1463,
+ "is_dttm": False,
+ "python_date_format": None,
+ "type": "DOUBLE PRECISION",
+ "verbose_name": None,
+ },
+ "expressionType": "SIMPLE",
+ "hasCustomLabel": False,
+ "isNew": False,
+ "label": "COUNT(is_software_dev)",
+ "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+ "sqlExpression": None,
+ }
+ ],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [
+ {
+ "clause": "WHERE",
+ "comparator": "Currently A Developer",
+ "expressionType": "SIMPLE",
+ "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+ "isExtra": False,
+ "isNew": False,
+ "operator": "==",
+ "sqlExpression": None,
+ "subject": "developer_type",
+ }
+ ],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "json",
+ "result_type": "results",
+ }
+
+ assert apply_post_process(result, form_data) == {
+ "queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]
+ }
+
+
+def test_apply_post_process_json_format_data_is_none():
+ """
+ It should be able to process json results with no data
+ """
+
+ result = {"queries": [{"result_format": ChartDataResultFormat.JSON,
"data": None}]}
+ form_data = {
+ "datasource": "19__table",
+ "viz_type": "pivot_table_v2",
+ "slice_id": 69,
+ "url_params": {},
+ "granularity_sqla": "time_start",
+ "time_grain_sqla": "P1D",
+ "time_range": "No filter",
+ "groupbyColumns": [],
+ "groupbyRows": [],
+ "metrics": [
+ {
+ "aggregate": "COUNT",
+ "column": {
+ "column_name": "is_software_dev",
+ "description": None,
+ "expression": None,
+ "filterable": True,
+ "groupby": True,
+ "id": 1463,
+ "is_dttm": False,
+ "python_date_format": None,
+ "type": "DOUBLE PRECISION",
+ "verbose_name": None,
+ },
+ "expressionType": "SIMPLE",
+ "hasCustomLabel": False,
+ "isNew": False,
+ "label": "COUNT(is_software_dev)",
+ "optionName": "metric_9i1kctig9yr_sizo6ihd2o",
+ "sqlExpression": None,
+ }
+ ],
+ "metricsLayout": "COLUMNS",
+ "adhoc_filters": [
+ {
+ "clause": "WHERE",
+ "comparator": "Currently A Developer",
+ "expressionType": "SIMPLE",
+ "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk",
+ "isExtra": False,
+ "isNew": False,
+ "operator": "==",
+ "sqlExpression": None,
+ "subject": "developer_type",
+ }
+ ],
+ "row_limit": 10000,
+ "order_desc": True,
+ "aggregateFunction": "Sum",
+ "valueFormat": "SMART_NUMBER",
+ "date_format": "smart_date",
+ "rowOrder": "key_a_to_z",
+ "colOrder": "key_a_to_z",
+ "extra_form_data": {},
+ "force": False,
+ "result_format": "json",
+ "result_type": "results",
+ }
+
+ assert apply_post_process(result, form_data) == {
+ "queries": [{"result_format": ChartDataResultFormat.JSON, "data":
None}]
+ }