This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch fix_array_no_data in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
commit 0717f6553a4ae4d08c32ffa9eb08e019af948d65 Author: Beto Dealmeida <[email protected]> AuthorDate: Fri Sep 20 14:31:13 2019 +0200 Fix no data in Presto (#8268) * Fix no data in Presto * Fix test --- superset/dataframe.py | 4 +++- tests/dataframe_test.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/superset/dataframe.py b/superset/dataframe.py index c173336..f82ab29 100644 --- a/superset/dataframe.py +++ b/superset/dataframe.py @@ -106,11 +106,13 @@ class SupersetDataFrame(object): self.column_names = column_names if dtype: + # put data in a 2D array so we can efficiently access each column; + # the reshape ensures the shape is 2D in case data is empty + array = np.array(data, dtype="object").reshape(-1, len(column_names)) # convert each column in data into a Series of the proper dtype; we # need to do this because we can not specify a mixed dtype when # instantiating the DataFrame, and this allows us to have different # dtypes for each column. - array = np.array(data, dtype="object") data = { column: pd.Series(array[:, i], dtype=dtype[column]) for i, column in enumerate(column_names) diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py index d254d63..a698cff 100644 --- a/tests/dataframe_test.py +++ b/tests/dataframe_test.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import numpy as np +import pandas as pd from superset.dataframe import dedup, SupersetDataFrame from superset.db_engine_specs import BaseEngineSpec @@ -135,3 +136,23 @@ class SupersetDataFrameTestCase(SupersetTestCase): cursor_descr = [("ds", "timestamp", None, None, None, None, True)] cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec) self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("<M8[ns]")) + + def test_no_type_coercion(self): + data = [("a", 1), ("b", 2)] + cursor_descr = [ + ("one", "varchar", None, None, None, None, True), + ("two", "integer", None, None, None, None, True), + ] + cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec) + self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("O")) + self.assertEqual(cdf.raw_df.dtypes[1], pd.Int64Dtype()) + + def test_empty_data(self): + data = [] + cursor_descr = [ + ("one", "varchar", None, None, None, None, True), + ("two", "integer", None, None, None, None, True), + ] + cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec) + self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("O")) + self.assertEqual(cdf.raw_df.dtypes[1], pd.Int64Dtype())
