This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch bigquery_get_catalog_names in repository https://gitbox.apache.org/repos/asf/superset.git
commit 56618246b48340a8db8868d9e22c0669a2852f31 Author: Beto Dealmeida <[email protected]> AuthorDate: Wed Mar 22 15:33:13 2023 -0700 feat(bigquery): get_catalog_names --- superset/db_engine_specs/bigquery.py | 83 ++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 171dad4732..26e3df0a9c 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -28,6 +28,7 @@ from marshmallow import fields, Schema from marshmallow.exceptions import ValidationError from sqlalchemy import column, types from sqlalchemy.engine.base import Engine +from sqlalchemy.engine.reflection import Inspector from sqlalchemy.sql import sqltypes from typing_extensions import TypedDict @@ -42,6 +43,17 @@ from superset.sql_parse import Table from superset.utils import core as utils from superset.utils.hashing import md5_sha_from_str +try: + import pandas_gbq + from google.cloud import bigquery + from google.oauth2 import service_account + + Client = bigquery.Client +except ModuleNotFoundError: + bigquery = None + pandas_gbq = None + Client = None # for type checking + if TYPE_CHECKING: from superset.models.core import Database # pragma: no cover @@ -327,17 +339,10 @@ class BigQueryEngineSpec(BaseEngineSpec): :param df: The dataframe with data to be uploaded :param to_sql_kwargs: The kwargs to be passed to pandas.DataFrame.to_sql` method """ - - try: - # pylint: disable=import-outside-toplevel - import pandas_gbq - from google.oauth2 import service_account - except ImportError as ex: + if pandas_gbq is None or service_account is None: raise Exception( - "Could not import libraries `pandas_gbq` or `google.oauth2`, which are " - "required to be installed in your environment in order " - "to upload data to BigQuery" - ) from ex + "Could not import libraries needed to upload data to BigQuery." + ) if not table.schema: raise Exception("The table schema must be defined") @@ -366,6 +371,19 @@ class BigQueryEngineSpec(BaseEngineSpec): pandas_gbq.to_gbq(df, **to_gbq_kwargs) + @classmethod + def _get_client(cls, engine: Engine) -> Client: + """ + Return the BigQuery client associated with an engine. + """ + if bigquery is None or service_account is None: + raise Exception("Could not import libraries needed to connect to BigQuery.") + + credentials = service_account.Credentials.from_service_account_info( + engine.dialect.credentials_info + ) + return bigquery.Client(credentials=credentials) + @classmethod def estimate_query_cost( cls, @@ -395,35 +413,36 @@ class BigQueryEngineSpec(BaseEngineSpec): costs.append(cls.estimate_statement_cost(processed_statement, database)) return costs + @classmethod + def get_catalog_names( # pylint: disable=unused-argument + cls, + database: "Database", + inspector: Inspector, + ) -> List[str]: + """ + Get all catalogs. + + In BigQuery, a catalog is called a "project". + """ + with database.get_sqla_engine_with_context() as engine: + client = cls._get_client(engine) + projects = client.list_projects() + + return sorted(project.project_id for project in projects) + @classmethod def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool: return True @classmethod def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]: - try: - # pylint: disable=import-outside-toplevel - # It's the only way to perfom a dry-run estimate cost - from google.cloud import bigquery - from google.oauth2 import service_account - except ImportError as ex: - raise Exception( - "Could not import libraries `pygibquery` or `google.oauth2`, which are " - "required to be installed in your environment in order " - "to upload data to BigQuery" - ) from ex - with cls.get_engine(cursor) as engine: - creds = engine.dialect.credentials_info - - creds = service_account.Credentials.from_service_account_info(creds) - client = bigquery.Client(credentials=creds) - job_config = bigquery.QueryJobConfig(dry_run=True) - - query_job = client.query( - statement, - job_config=job_config, - ) # Make an API request. + client = cls._get_client(engine) + job_config = bigquery.QueryJobConfig(dry_run=True) + query_job = client.query( + statement, + job_config=job_config, + ) # Make an API request. # Format Bytes. # TODO: Humanize in case more db engine specs need to be added,
