This is an automated email from the ASF dual-hosted git repository.
dpgaspar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new 2b59075 tests(engine_specs): full bigquery engine coverage (#9702)
2b59075 is described below
commit 2b59075d57e2bce243fdaba4cd62cbd127a9b1be
Author: Daniel Vaz Gaspar <[email protected]>
AuthorDate: Thu Apr 30 18:31:15 2020 +0100
tests(engine_specs): full bigquery engine coverage (#9702)
---
superset/db_engine_specs/bigquery.py | 17 +++--
tests/db_engine_specs/bigquery_tests.py | 123 ++++++++++++++++++++++++++++++++
2 files changed, 133 insertions(+), 7 deletions(-)
diff --git a/superset/db_engine_specs/bigquery.py
b/superset/db_engine_specs/bigquery.py
index 4f5b5d1..992b5fe 100644
--- a/superset/db_engine_specs/bigquery.py
+++ b/superset/db_engine_specs/bigquery.py
@@ -26,7 +26,8 @@ from sqlalchemy.sql.expression import ColumnClause
from superset.db_engine_specs.base import BaseEngineSpec
if TYPE_CHECKING:
- from superset.models.core import Database # pylint: disable=unused-import
+ # pylint: disable=unused-import
+ from superset.models.core import Database # pragma: no cover
class BigQueryEngineSpec(BaseEngineSpec):
@@ -84,6 +85,8 @@ class BigQueryEngineSpec(BaseEngineSpec):
@classmethod
def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple]:
data = super().fetch_data(cursor, limit)
+ # Support type BigQuery Row, introduced here PR #4071
+ # google.cloud.bigquery.table.Row
if data and type(data[0]).__name__ == "Row":
data = [r.values() for r in data] # type: ignore
return data
@@ -174,22 +177,22 @@ class BigQueryEngineSpec(BaseEngineSpec):
`DataFrame.to_gbq()` which requires `pandas_gbq` to be installed.
:param df: Dataframe with data to be uploaded
- :param kwargs: kwargs to be passed to to_gbq() method. Requires both
`schema
- and ``name` to be present in kwargs, which are combined and passed to
- `to_gbq()` as `destination_table`.
+ :param kwargs: kwargs to be passed to to_gbq() method. Requires that
`schema`,
+ `name` and `con` are present in kwargs. `name` and `schema` are
combined
+ and passed to `to_gbq()` as `destination_table`.
"""
try:
import pandas_gbq
from google.oauth2 import service_account
except ImportError:
raise Exception(
- "Could not import the library `pandas_gbq`, which is "
+ "Could not import libraries `pandas_gbq` or `google.oauth2`,
which are "
"required to be installed in your environment in order "
"to upload data to BigQuery"
)
- if not ("name" in kwargs and "schema" in kwargs):
- raise Exception("name and schema need to be defined in kwargs")
+ if not ("name" in kwargs and "schema" in kwargs and "con" in kwargs):
+ raise Exception("name, schema and con need to be defined in
kwargs")
gbq_kwargs = {}
gbq_kwargs["project_id"] = kwargs["con"].engine.url.host
diff --git a/tests/db_engine_specs/bigquery_tests.py
b/tests/db_engine_specs/bigquery_tests.py
index c9b9878..67f7747 100644
--- a/tests/db_engine_specs/bigquery_tests.py
+++ b/tests/db_engine_specs/bigquery_tests.py
@@ -14,14 +14,22 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+import sys
+import unittest.mock as mock
+
+from pandas import DataFrame
from sqlalchemy import column
+from superset.db_engine_specs.base import BaseEngineSpec
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
from tests.db_engine_specs.base_tests import DbEngineSpecTestCase
class BigQueryTestCase(DbEngineSpecTestCase):
def test_bigquery_sqla_column_label(self):
+ """
+ DB Eng Specs (bigquery): Test column label
+ """
test_cases = {
"Col": "Col",
"SUM(x)": "SUM_x__5f110",
@@ -33,11 +41,16 @@ class BigQueryTestCase(DbEngineSpecTestCase):
self.assertEqual(actual, expected)
def test_convert_dttm(self):
+ """
+ DB Eng Specs (bigquery): Test conversion to date time
+ """
dttm = self.get_dttm()
test_cases = {
"DATE": "CAST('2019-01-02' AS DATE)",
"DATETIME": "CAST('2019-01-02T03:04:05.678900' AS DATETIME)",
"TIMESTAMP": "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)",
+ "TIME": "CAST('03:04:05.678900' AS TIME)",
+ "UNKNOWNTYPE": None,
}
for target_type, expected in test_cases.items():
@@ -45,6 +58,9 @@ class BigQueryTestCase(DbEngineSpecTestCase):
self.assertEqual(actual, expected)
def test_timegrain_expressions(self):
+ """
+ DB Eng Specs (bigquery): Test time grain expressions
+ """
col = column("temporal")
test_cases = {
"DATE": "DATE_TRUNC(temporal, HOUR)",
@@ -57,3 +73,110 @@ class BigQueryTestCase(DbEngineSpecTestCase):
col=col, pdf=None, time_grain="PT1H", type_=type_
)
self.assertEqual(str(actual), expected)
+
+ def test_fetch_data(self):
+ """
+ DB Eng Specs (bigquery): Test fetch data
+ """
+ # Mock a google.cloud.bigquery.table.Row
+ class Row(object):
+ def __init__(self, value):
+ self._value = value
+
+ def values(self):
+ return self._value
+
+ data1 = [(1, "foo")]
+ with mock.patch.object(BaseEngineSpec, "fetch_data",
return_value=data1):
+ result = BigQueryEngineSpec.fetch_data(None, 0)
+ self.assertEqual(result, data1)
+
+ data2 = [Row(1), Row(2)]
+ with mock.patch.object(BaseEngineSpec, "fetch_data",
return_value=data2):
+ result = BigQueryEngineSpec.fetch_data(None, 0)
+ self.assertEqual(result, [1, 2])
+
+ def test_extra_table_metadata(self):
+ """
+ DB Eng Specs (bigquery): Test extra table metadata
+ """
+ database = mock.Mock()
+ # Test no indexes
+ database.get_indexes = mock.MagicMock(return_value=None)
+ result = BigQueryEngineSpec.extra_table_metadata(
+ database, "some_table", "some_schema"
+ )
+ self.assertEqual(result, {})
+
+ index_metadata = [
+ {"name": "clustering", "column_names": ["c_col1", "c_col2",
"c_col3"],},
+ {"name": "partition", "column_names": ["p_col1", "p_col2",
"p_col3"],},
+ ]
+ expected_result = {
+ "partitions": {"cols": [["p_col1", "p_col2", "p_col3"]]},
+ "clustering": {"cols": [["c_col1", "c_col2", "c_col3"]]},
+ }
+ database.get_indexes = mock.MagicMock(return_value=index_metadata)
+ result = BigQueryEngineSpec.extra_table_metadata(
+ database, "some_table", "some_schema"
+ )
+ self.assertEqual(result, expected_result)
+
+ def test_df_to_sql(self):
+ """
+ DB Eng Specs (bigquery): Test DataFrame to SQL contract
+ """
+ # test missing google.oauth2 dependency
+ sys.modules["pandas_gbq"] = mock.MagicMock()
+ df = DataFrame()
+ self.assertRaisesRegexp(
+ Exception,
+ "Could not import libraries",
+ BigQueryEngineSpec.df_to_sql,
+ df,
+ con="some_connection",
+ schema="schema",
+ name="name",
+ )
+
+ invalid_kwargs = [
+ {"name": "some_name"},
+ {"schema": "some_schema"},
+ {"con": "some_con"},
+ {"name": "some_name", "con": "some_con"},
+ {"name": "some_name", "schema": "some_schema"},
+ {"con": "some_con", "schema": "some_schema"},
+ ]
+ # Test check for missing required kwargs (name, schema, con)
+ sys.modules["google.oauth2"] = mock.MagicMock()
+ for invalid_kwarg in invalid_kwargs:
+ self.assertRaisesRegexp(
+ Exception,
+ "name, schema and con need to be defined in kwargs",
+ BigQueryEngineSpec.df_to_sql,
+ df,
+ **invalid_kwarg,
+ )
+
+ import pandas_gbq
+ from google.oauth2 import service_account
+
+ pandas_gbq.to_gbq = mock.Mock()
+ service_account.Credentials.from_service_account_info = mock.MagicMock(
+ return_value="account_info"
+ )
+ connection = mock.Mock()
+ connection.engine.url.host = "google-host"
+ connection.dialect.credentials_info = "secrets"
+
+ BigQueryEngineSpec.df_to_sql(
+ df, con=connection, schema="schema", name="name",
if_exists="extra_key"
+ )
+
+ pandas_gbq.to_gbq.assert_called_with(
+ df,
+ project_id="google-host",
+ destination_table="schema.name",
+ credentials="account_info",
+ if_exists="extra_key",
+ )