This is an automated email from the ASF dual-hosted git repository.
maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new 4bc5fe5 [BUGFIX]: Check datatype of results before converting to
DataFrame (#4108)
4bc5fe5 is described below
commit 4bc5fe549574a74b574a7ec101099b1455cd4a24
Author: Marcus Levine <[email protected]>
AuthorDate: Tue Jan 23 23:58:06 2018 -0500
[BUGFIX]: Check datatype of results before converting to DataFrame (#4108)
* conditional check on datatype of results before converting to df
fix type checking
fix conditional checks
remove trailing whitespace and fix df_data fallback def
actually remove trailing whitespace
generalized type check to check all columns for dict
refactor dict col check
* move df conversion to helper and add unit test
add missing newlines
another missing newline
fix quotes
more quote fixes
---
superset/sql_lab.py | 27 ++++++++++++++++++++++-----
tests/sqllab_tests.py | 17 +++++++++++++++++
2 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/superset/sql_lab.py b/superset/sql_lab.py
index 63225f3..87a6b44 100644
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -10,6 +10,7 @@ from time import sleep
import uuid
from celery.exceptions import SoftTimeLimitExceeded
+import numpy as np
import pandas as pd
import sqlalchemy
from sqlalchemy.orm import sessionmaker
@@ -85,6 +86,26 @@ def get_session(nullpool):
return session
+def convert_results_to_df(cursor_description, data):
+ """Convert raw query results to a DataFrame."""
+ column_names = (
+ [col[0] for col in cursor_description] if cursor_description else [])
+ column_names = dedup(column_names)
+
+ # check whether the result set has any nested dict columns
+ if data:
+ first_row = data[0]
+ has_dict_col = any([isinstance(c, dict) for c in first_row])
+ df_data = list(data) if has_dict_col else np.array(data)
+ else:
+ df_data = []
+
+ cdf = dataframe.SupersetDataFrame(
+ pd.DataFrame(df_data, columns=column_names))
+
+ return cdf
+
+
@celery_app.task(bind=True, soft_time_limit=SQLLAB_TIMEOUT)
def get_sql_results(
ctask, query_id, return_results=True, store_results=False,
@@ -224,11 +245,7 @@ def execute_sql(
},
default=utils.json_iso_dttm_ser)
- column_names = (
- [col[0] for col in cursor_description] if cursor_description else [])
- column_names = dedup(column_names)
- cdf = dataframe.SupersetDataFrame(
- pd.DataFrame(list(data), columns=column_names))
+ cdf = convert_results_to_df(cursor_description, data)
query.rows = cdf.size
query.progress = 100
diff --git a/tests/sqllab_tests.py b/tests/sqllab_tests.py
index 2caf4c2..53144ea 100644
--- a/tests/sqllab_tests.py
+++ b/tests/sqllab_tests.py
@@ -12,6 +12,7 @@ from flask_appbuilder.security.sqla import models as ab_models
from superset import appbuilder, db, sm, utils
from superset.models.sql_lab import Query
+from superset.sql_lab import convert_results_to_df
from .base_tests import SupersetTestCase
@@ -200,6 +201,22 @@ class SqlLabTests(SupersetTestCase):
user_name='admin',
raise_on_error=True)
+ def test_df_conversion_no_dict(self):
+ cols = [['string_col'], ['int_col']]
+ data = [['a', 4]]
+ cdf = convert_results_to_df(cols, data)
+
+ self.assertEquals(len(data), cdf.size)
+ self.assertEquals(len(cols), len(cdf.columns))
+
+ def test_df_conversion_dict(self):
+ cols = [['string_col'], ['dict_col'], ['int_col']]
+ data = [['a', {'c1': 1, 'c2': 2, 'c3': 3}, 4]]
+ cdf = convert_results_to_df(cols, data)
+
+ self.assertEquals(len(data), cdf.size)
+ self.assertEquals(len(cols), len(cdf.columns))
+
if __name__ == '__main__':
unittest.main()
--
To stop receiving notification emails like this one, please contact
[email protected].