This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch resilient_database_api in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
commit e19c9274f51b6d43a3ee91a5fc70d0128acd62e1 Author: Beto Dealmeida <[email protected]> AuthorDate: Mon Jul 27 20:45:25 2020 -0700 Handle ROW data stored as string --- superset/db_engine_specs/presto.py | 9 ++++-- tests/db_engine_specs/presto_tests.py | 59 ++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index ebf8ec4..1b739bd 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -22,7 +22,7 @@ from collections import defaultdict, deque from contextlib import closing from datetime import datetime from distutils.version import StrictVersion -from typing import Any, cast, Dict, List, Optional, Tuple, TYPE_CHECKING +from typing import Any, cast, Dict, List, Optional, Tuple, TYPE_CHECKING, Union from urllib import parse import pandas as pd @@ -653,12 +653,15 @@ class PrestoEngineSpec(BaseEngineSpec): # expand columns; we append them to the left so they are added # immediately after the parent expanded = get_children(column) - to_process.extendleft((column, level) for column in expanded) + to_process.extendleft((column, level) for column in expanded[::-1]) expanded_columns.extend(expanded) # expand row objects into new columns for row in data: - for value, col in zip(row.get(name) or [], expanded): + row_data: Union[str, List] = row.get(name) or [] + if isinstance(row_data, str): + row[name] = row_data = json.loads(row_data) + for value, col in zip(row_data, expanded): row[col["name"]] = value data = [ diff --git a/tests/db_engine_specs/presto_tests.py b/tests/db_engine_specs/presto_tests.py index b57ab01..9d1d384 100644 --- a/tests/db_engine_specs/presto_tests.py +++ b/tests/db_engine_specs/presto_tests.py @@ -214,9 +214,9 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec): "name": "row_column", "type": "ROW(NESTED_OBJ1 VARCHAR, NESTED_ROW ROW(NESTED_OBJ2 VARCHAR))", }, + {"name": "row_column.nested_obj1", "type": "VARCHAR"}, {"name": "row_column.nested_row", "type": "ROW(NESTED_OBJ2 VARCHAR)"}, {"name": "row_column.nested_row.nested_obj2", "type": "VARCHAR"}, - {"name": "row_column.nested_obj1", "type": "VARCHAR"}, ] expected_data = [ { @@ -433,3 +433,60 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec): } ] self.assertEqual(formatted_cost, expected) + + @mock.patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + {"PRESTO_EXPAND_DATA": True}, + clear=True, + ) + def test_presto_expand_data_array(self): + cols = [ + {"name": "event_id", "type": "VARCHAR", "is_date": False}, + {"name": "timestamp", "type": "BIGINT", "is_date": False}, + { + "name": "user", + "type": "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME VARCHAR)", + "is_date": False, + }, + ] + data = [ + { + "event_id": "abcdef01-2345-6789-abcd-ef0123456789", + "timestamp": "1595895506219", + "user": '[1, "JOHN", "DOE"]', + } + ] + actual_cols, actual_data, actual_expanded_cols = PrestoEngineSpec.expand_data( + cols, data + ) + expected_cols = [ + {"name": "event_id", "type": "VARCHAR", "is_date": False}, + {"name": "timestamp", "type": "BIGINT", "is_date": False}, + { + "name": "user", + "type": "ROW(ID BIGINT, FIRST_NAME VARCHAR, LAST_NAME VARCHAR)", + "is_date": False, + }, + {"name": "user.id", "type": "BIGINT"}, + {"name": "user.first_name", "type": "VARCHAR"}, + {"name": "user.last_name", "type": "VARCHAR"}, + ] + expected_data = [ + { + "event_id": "abcdef01-2345-6789-abcd-ef0123456789", + "timestamp": "1595895506219", + "user": [1, "JOHN", "DOE"], + "user.id": 1, + "user.first_name": "JOHN", + "user.last_name": "DOE", + } + ] + expected_expanded_cols = [ + {"name": "user.id", "type": "BIGINT"}, + {"name": "user.first_name", "type": "VARCHAR"}, + {"name": "user.last_name", "type": "VARCHAR"}, + ] + + self.assertEqual(actual_cols, expected_cols) + self.assertEqual(actual_data, expected_data) + self.assertEqual(actual_expanded_cols, expected_expanded_cols)
