This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch resilient_database_api
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git

commit e19c9274f51b6d43a3ee91a5fc70d0128acd62e1
Author: Beto Dealmeida <[email protected]>
AuthorDate: Mon Jul 27 20:45:25 2020 -0700

    Handle ROW data stored as string
---
 superset/db_engine_specs/presto.py    |  9 ++++--
 tests/db_engine_specs/presto_tests.py | 59 ++++++++++++++++++++++++++++++++++-
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/superset/db_engine_specs/presto.py 
b/superset/db_engine_specs/presto.py
index ebf8ec4..1b739bd 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -22,7 +22,7 @@ from collections import defaultdict, deque
 from contextlib import closing
 from datetime import datetime
 from distutils.version import StrictVersion
-from typing import Any, cast, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import Any, cast, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
 from urllib import parse
 
 import pandas as pd
@@ -653,12 +653,15 @@ class PrestoEngineSpec(BaseEngineSpec):
                 # expand columns; we append them to the left so they are added
                 # immediately after the parent
                 expanded = get_children(column)
-                to_process.extendleft((column, level) for column in expanded)
+                to_process.extendleft((column, level) for column in 
expanded[::-1])
                 expanded_columns.extend(expanded)
 
                 # expand row objects into new columns
                 for row in data:
-                    for value, col in zip(row.get(name) or [], expanded):
+                    row_data: Union[str, List] = row.get(name) or []
+                    if isinstance(row_data, str):
+                        row[name] = row_data = json.loads(row_data)
+                    for value, col in zip(row_data, expanded):
                         row[col["name"]] = value
 
         data = [
diff --git a/tests/db_engine_specs/presto_tests.py 
b/tests/db_engine_specs/presto_tests.py
index b57ab01..9d1d384 100644
--- a/tests/db_engine_specs/presto_tests.py
+++ b/tests/db_engine_specs/presto_tests.py
@@ -214,9 +214,9 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec):
                 "name": "row_column",
                 "type": "ROW(NESTED_OBJ1 VARCHAR, NESTED_ROW ROW(NESTED_OBJ2 
VARCHAR))",
             },
+            {"name": "row_column.nested_obj1", "type": "VARCHAR"},
             {"name": "row_column.nested_row", "type": "ROW(NESTED_OBJ2 
VARCHAR)"},
             {"name": "row_column.nested_row.nested_obj2", "type": "VARCHAR"},
-            {"name": "row_column.nested_obj1", "type": "VARCHAR"},
         ]
         expected_data = [
             {
@@ -433,3 +433,60 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec):
             }
         ]
         self.assertEqual(formatted_cost, expected)
+
+    @mock.patch.dict(
+        "superset.extensions.feature_flag_manager._feature_flags",
+        {"PRESTO_EXPAND_DATA": True},
+        clear=True,
+    )
+    def test_presto_expand_data_array(self):
+        cols = [
+            {"name": "event_id", "type": "VARCHAR", "is_date": False},
+            {"name": "timestamp", "type": "BIGINT", "is_date": False},
+            {
+                "name": "user",
+                "type": "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME 
VARCHAR)",
+                "is_date": False,
+            },
+        ]
+        data = [
+            {
+                "event_id": "abcdef01-2345-6789-abcd-ef0123456789",
+                "timestamp": "1595895506219",
+                "user": '[1, "JOHN", "DOE"]',
+            }
+        ]
+        actual_cols, actual_data, actual_expanded_cols = 
PrestoEngineSpec.expand_data(
+            cols, data
+        )
+        expected_cols = [
+            {"name": "event_id", "type": "VARCHAR", "is_date": False},
+            {"name": "timestamp", "type": "BIGINT", "is_date": False},
+            {
+                "name": "user",
+                "type": "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME 
VARCHAR)",
+                "is_date": False,
+            },
+            {"name": "user.id", "type": "BIGINT"},
+            {"name": "user.first_name", "type": "VARCHAR"},
+            {"name": "user.last_name", "type": "VARCHAR"},
+        ]
+        expected_data = [
+            {
+                "event_id": "abcdef01-2345-6789-abcd-ef0123456789",
+                "timestamp": "1595895506219",
+                "user": [1, "JOHN", "DOE"],
+                "user.id": 1,
+                "user.first_name": "JOHN",
+                "user.last_name": "DOE",
+            }
+        ]
+        expected_expanded_cols = [
+            {"name": "user.id", "type": "BIGINT"},
+            {"name": "user.first_name", "type": "VARCHAR"},
+            {"name": "user.last_name", "type": "VARCHAR"},
+        ]
+
+        self.assertEqual(actual_cols, expected_cols)
+        self.assertEqual(actual_data, expected_data)
+        self.assertEqual(actual_expanded_cols, expected_expanded_cols)

Reply via email to