This is an automated email from the ASF dual-hosted git repository. dpgaspar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push: new 2b59075 tests(engine_specs): full bigquery engine coverage (#9702) 2b59075 is described below commit 2b59075d57e2bce243fdaba4cd62cbd127a9b1be Author: Daniel Vaz Gaspar <danielvazgas...@gmail.com> AuthorDate: Thu Apr 30 18:31:15 2020 +0100 tests(engine_specs): full bigquery engine coverage (#9702) --- superset/db_engine_specs/bigquery.py | 17 +++-- tests/db_engine_specs/bigquery_tests.py | 123 ++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 7 deletions(-) diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 4f5b5d1..992b5fe 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -26,7 +26,8 @@ from sqlalchemy.sql.expression import ColumnClause from superset.db_engine_specs.base import BaseEngineSpec if TYPE_CHECKING: - from superset.models.core import Database # pylint: disable=unused-import + # pylint: disable=unused-import + from superset.models.core import Database # pragma: no cover class BigQueryEngineSpec(BaseEngineSpec): @@ -84,6 +85,8 @@ class BigQueryEngineSpec(BaseEngineSpec): @classmethod def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple]: data = super().fetch_data(cursor, limit) + # Support type BigQuery Row, introduced here PR #4071 + # google.cloud.bigquery.table.Row if data and type(data[0]).__name__ == "Row": data = [r.values() for r in data] # type: ignore return data @@ -174,22 +177,22 @@ class BigQueryEngineSpec(BaseEngineSpec): `DataFrame.to_gbq()` which requires `pandas_gbq` to be installed. :param df: Dataframe with data to be uploaded - :param kwargs: kwargs to be passed to to_gbq() method. Requires both `schema - and ``name` to be present in kwargs, which are combined and passed to - `to_gbq()` as `destination_table`. + :param kwargs: kwargs to be passed to to_gbq() method. Requires that `schema`, + `name` and `con` are present in kwargs. `name` and `schema` are combined + and passed to `to_gbq()` as `destination_table`. """ try: import pandas_gbq from google.oauth2 import service_account except ImportError: raise Exception( - "Could not import the library `pandas_gbq`, which is " + "Could not import libraries `pandas_gbq` or `google.oauth2`, which are " "required to be installed in your environment in order " "to upload data to BigQuery" ) - if not ("name" in kwargs and "schema" in kwargs): - raise Exception("name and schema need to be defined in kwargs") + if not ("name" in kwargs and "schema" in kwargs and "con" in kwargs): + raise Exception("name, schema and con need to be defined in kwargs") gbq_kwargs = {} gbq_kwargs["project_id"] = kwargs["con"].engine.url.host diff --git a/tests/db_engine_specs/bigquery_tests.py b/tests/db_engine_specs/bigquery_tests.py index c9b9878..67f7747 100644 --- a/tests/db_engine_specs/bigquery_tests.py +++ b/tests/db_engine_specs/bigquery_tests.py @@ -14,14 +14,22 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import sys +import unittest.mock as mock + +from pandas import DataFrame from sqlalchemy import column +from superset.db_engine_specs.base import BaseEngineSpec from superset.db_engine_specs.bigquery import BigQueryEngineSpec from tests.db_engine_specs.base_tests import DbEngineSpecTestCase class BigQueryTestCase(DbEngineSpecTestCase): def test_bigquery_sqla_column_label(self): + """ + DB Eng Specs (bigquery): Test column label + """ test_cases = { "Col": "Col", "SUM(x)": "SUM_x__5f110", @@ -33,11 +41,16 @@ class BigQueryTestCase(DbEngineSpecTestCase): self.assertEqual(actual, expected) def test_convert_dttm(self): + """ + DB Eng Specs (bigquery): Test conversion to date time + """ dttm = self.get_dttm() test_cases = { "DATE": "CAST('2019-01-02' AS DATE)", "DATETIME": "CAST('2019-01-02T03:04:05.678900' AS DATETIME)", "TIMESTAMP": "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)", + "TIME": "CAST('03:04:05.678900' AS TIME)", + "UNKNOWNTYPE": None, } for target_type, expected in test_cases.items(): @@ -45,6 +58,9 @@ class BigQueryTestCase(DbEngineSpecTestCase): self.assertEqual(actual, expected) def test_timegrain_expressions(self): + """ + DB Eng Specs (bigquery): Test time grain expressions + """ col = column("temporal") test_cases = { "DATE": "DATE_TRUNC(temporal, HOUR)", @@ -57,3 +73,110 @@ class BigQueryTestCase(DbEngineSpecTestCase): col=col, pdf=None, time_grain="PT1H", type_=type_ ) self.assertEqual(str(actual), expected) + + def test_fetch_data(self): + """ + DB Eng Specs (bigquery): Test fetch data + """ + # Mock a google.cloud.bigquery.table.Row + class Row(object): + def __init__(self, value): + self._value = value + + def values(self): + return self._value + + data1 = [(1, "foo")] + with mock.patch.object(BaseEngineSpec, "fetch_data", return_value=data1): + result = BigQueryEngineSpec.fetch_data(None, 0) + self.assertEqual(result, data1) + + data2 = [Row(1), Row(2)] + with mock.patch.object(BaseEngineSpec, "fetch_data", return_value=data2): + result = BigQueryEngineSpec.fetch_data(None, 0) + self.assertEqual(result, [1, 2]) + + def test_extra_table_metadata(self): + """ + DB Eng Specs (bigquery): Test extra table metadata + """ + database = mock.Mock() + # Test no indexes + database.get_indexes = mock.MagicMock(return_value=None) + result = BigQueryEngineSpec.extra_table_metadata( + database, "some_table", "some_schema" + ) + self.assertEqual(result, {}) + + index_metadata = [ + {"name": "clustering", "column_names": ["c_col1", "c_col2", "c_col3"],}, + {"name": "partition", "column_names": ["p_col1", "p_col2", "p_col3"],}, + ] + expected_result = { + "partitions": {"cols": [["p_col1", "p_col2", "p_col3"]]}, + "clustering": {"cols": [["c_col1", "c_col2", "c_col3"]]}, + } + database.get_indexes = mock.MagicMock(return_value=index_metadata) + result = BigQueryEngineSpec.extra_table_metadata( + database, "some_table", "some_schema" + ) + self.assertEqual(result, expected_result) + + def test_df_to_sql(self): + """ + DB Eng Specs (bigquery): Test DataFrame to SQL contract + """ + # test missing google.oauth2 dependency + sys.modules["pandas_gbq"] = mock.MagicMock() + df = DataFrame() + self.assertRaisesRegexp( + Exception, + "Could not import libraries", + BigQueryEngineSpec.df_to_sql, + df, + con="some_connection", + schema="schema", + name="name", + ) + + invalid_kwargs = [ + {"name": "some_name"}, + {"schema": "some_schema"}, + {"con": "some_con"}, + {"name": "some_name", "con": "some_con"}, + {"name": "some_name", "schema": "some_schema"}, + {"con": "some_con", "schema": "some_schema"}, + ] + # Test check for missing required kwargs (name, schema, con) + sys.modules["google.oauth2"] = mock.MagicMock() + for invalid_kwarg in invalid_kwargs: + self.assertRaisesRegexp( + Exception, + "name, schema and con need to be defined in kwargs", + BigQueryEngineSpec.df_to_sql, + df, + **invalid_kwarg, + ) + + import pandas_gbq + from google.oauth2 import service_account + + pandas_gbq.to_gbq = mock.Mock() + service_account.Credentials.from_service_account_info = mock.MagicMock( + return_value="account_info" + ) + connection = mock.Mock() + connection.engine.url.host = "google-host" + connection.dialect.credentials_info = "secrets" + + BigQueryEngineSpec.df_to_sql( + df, con=connection, schema="schema", name="name", if_exists="extra_key" + ) + + pandas_gbq.to_gbq.assert_called_with( + df, + project_id="google-host", + destination_table="schema.name", + credentials="account_info", + if_exists="extra_key", + )