This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch fix_array_no_data
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git

commit 0717f6553a4ae4d08c32ffa9eb08e019af948d65
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Sep 20 14:31:13 2019 +0200

    Fix no data in Presto (#8268)
    
    * Fix no data in Presto
    
    * Fix test
---
 superset/dataframe.py   |  4 +++-
 tests/dataframe_test.py | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/superset/dataframe.py b/superset/dataframe.py
index c173336..f82ab29 100644
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -106,11 +106,13 @@ class SupersetDataFrame(object):
         self.column_names = column_names
 
         if dtype:
+            # put data in a 2D array so we can efficiently access each column;
+            # the reshape ensures the shape is 2D in case data is empty
+            array = np.array(data, dtype="object").reshape(-1, 
len(column_names))
             # convert each column in data into a Series of the proper dtype; we
             # need to do this because we can not specify a mixed dtype when
             # instantiating the DataFrame, and this allows us to have different
             # dtypes for each column.
-            array = np.array(data, dtype="object")
             data = {
                 column: pd.Series(array[:, i], dtype=dtype[column])
                 for i, column in enumerate(column_names)
diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py
index d254d63..a698cff 100644
--- a/tests/dataframe_test.py
+++ b/tests/dataframe_test.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import numpy as np
+import pandas as pd
 
 from superset.dataframe import dedup, SupersetDataFrame
 from superset.db_engine_specs import BaseEngineSpec
@@ -135,3 +136,23 @@ class SupersetDataFrameTestCase(SupersetTestCase):
         cursor_descr = [("ds", "timestamp", None, None, None, None, True)]
         cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
         self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("<M8[ns]"))
+
+    def test_no_type_coercion(self):
+        data = [("a", 1), ("b", 2)]
+        cursor_descr = [
+            ("one", "varchar", None, None, None, None, True),
+            ("two", "integer", None, None, None, None, True),
+        ]
+        cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
+        self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("O"))
+        self.assertEqual(cdf.raw_df.dtypes[1], pd.Int64Dtype())
+
+    def test_empty_data(self):
+        data = []
+        cursor_descr = [
+            ("one", "varchar", None, None, None, None, True),
+            ("two", "integer", None, None, None, None, True),
+        ]
+        cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
+        self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("O"))
+        self.assertEqual(cdf.raw_df.dtypes[1], pd.Int64Dtype())

Reply via email to