This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch bigquery-catalog in repository https://gitbox.apache.org/repos/asf/superset.git
commit a652b4e711c91ff185750aef003c6f126b2a00ae Author: Beto Dealmeida <[email protected]> AuthorDate: Thu May 9 15:47:23 2024 -0400 feat: add support for catalogs --- superset/db_engine_specs/README.md | 22 +---- superset/db_engine_specs/bigquery.py | 34 +++++++- superset/db_engine_specs/snowflake.py | 28 +++++-- tests/unit_tests/db_engine_specs/test_bigquery.py | 94 ++++++++++++++++++++++ tests/unit_tests/db_engine_specs/test_snowflake.py | 88 ++++++++++++++++++++ 5 files changed, 238 insertions(+), 28 deletions(-) diff --git a/superset/db_engine_specs/README.md b/superset/db_engine_specs/README.md index 4a108be658..88362f6b02 100644 --- a/superset/db_engine_specs/README.md +++ b/superset/db_engine_specs/README.md @@ -706,29 +706,11 @@ Hive and Trino: 4. Table 5. Column -If the database supports catalogs, then the DB engine spec should have the `supports_catalog` class attribute set to true. +If the database supports catalogs, then the DB engine spec should have the `supports_catalog` class attribute set to true. It should also implement the `get_default_catalog` method, so that the proper permissions can be created when datasets are added. ### Dynamic catalog -Superset has no support for multiple catalogs. A given SQLAlchemy URI connects to a single catalog, and it's impossible to browse other catalogs, or change the catalog. This means that datasets can only be added for the main catalog of the database. For example, with this Postgres SQLAlchemy URI: - -``` -postgresql://admin:[email protected]:5432/db -``` - -Here, datasets can only be added to the `db` catalog (which Postgres calls a "database"). - -One confusing problem is that many databases allow querying across catalogs in SQL Lab. For example, with BigQuery one can write: - -```sql -SELECT * FROM project.schema.table -``` - -This means that **even though the database is configured for a given catalog (project), users can query other projects**. This is a common workaround for creating datasets in catalogs other than the catalog configured in the database: just create a virtual dataset. - -Ideally we would want users to be able to choose the catalog when using SQL Lab and when creating datasets. In order to do that, DB engine specs need to implement a method that rewrites the SQLAlchemy URI depending on the desired catalog. This method already exists, and is the same method used for dynamic schemas, `adjust_engine_params`, but currently there are no UI affordances for choosing a catalog. - -Before the UI is implemented Superset still needs to implement support for catalogs in its security manager. But in the meantime, it's possible for DB engine spec developers to support dynamic catalogs, by setting `supports_dynamic_catalog` to true and implementing `adjust_engine_params` to handle a catalog. +Superset support for multiple catalogs. Since, in general, a given SQLAlchemy URI connects only to a single catalog, it requires DB engine specs to implement the `adjust_engine_params` method to rewrite the URL to connect to a different catalog, similar to how dynamic schemas work. Additionally, DB engine specs should also implement the `get_catalog_names` method, so that users can browse the available catalogs. ### SSH tunneling diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index ca52bd51ce..4fff5ab3d7 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -35,6 +35,7 @@ from marshmallow.exceptions import ValidationError from sqlalchemy import column, types from sqlalchemy.engine.base import Engine from sqlalchemy.engine.reflection import Inspector +from sqlalchemy.engine.url import URL from sqlalchemy.sql import sqltypes from superset import sql_parse @@ -127,7 +128,7 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met allows_hidden_cc_in_orderby = True - supports_catalog = False + supports_catalog = supports_dynamic_catalog = True """ https://www.python.org/dev/peps/pep-0249/#arraysize @@ -459,6 +460,24 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met for statement in statements ] + @classmethod + def get_default_catalog(cls, database: Database) -> str | None: + """ + Get the default catalog. + """ + url = database.url_object + + # The SQLAlchemy driver accepts both `bigquery://project` (where the project is + # technically a host) and `bigquery:///project` (where it's a database). But + # both can be missing, and the project is inferred from the authentication + # credentials. + if project := url.host or url.database: + return project + + with database.get_sqla_engine() as engine: + client = cls._get_client(engine) + return client.project + @classmethod def get_catalog_names( cls, @@ -477,6 +496,19 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met return {project.project_id for project in projects} + @classmethod + def adjust_engine_params( + cls, + uri: URL, + connect_args: dict[str, Any], + catalog: str | None = None, + schema: str | None = None, + ) -> tuple[URL, dict[str, Any]]: + if catalog: + uri = uri.set(host=catalog, database="") + + return uri, connect_args + @classmethod def get_allow_cost_estimate(cls, extra: dict[str, Any]) -> bool: return True diff --git a/superset/db_engine_specs/snowflake.py b/superset/db_engine_specs/snowflake.py index 9a82cfccae..137cc4e00e 100644 --- a/superset/db_engine_specs/snowflake.py +++ b/superset/db_engine_specs/snowflake.py @@ -85,7 +85,7 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): sqlalchemy_uri_placeholder = "snowflake://" supports_dynamic_schema = True - supports_catalog = False + supports_catalog = supports_dynamic_catalog = True _time_grain_expressions = { None: "{col}", @@ -144,12 +144,19 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): catalog: Optional[str] = None, schema: Optional[str] = None, ) -> tuple[URL, dict[str, Any]]: - database = uri.database - if "/" in database: - database = database.split("/")[0] - if schema: - schema = parse.quote(schema, safe="") - uri = uri.set(database=f"{database}/{schema}") + if "/" in uri.database: + current_catalog, current_schema = uri.database.split("/", 1) + else: + current_catalog, current_schema = uri.database, None + + adjusted_database = "/".join( + [ + catalog or current_catalog, + schema or current_schema or "", + ] + ).rstrip("/") + + uri = uri.set(database=adjusted_database) return uri, connect_args @@ -169,6 +176,13 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): return parse.unquote(database.split("/")[1]) + @classmethod + def get_default_catalog(cls, database: "Database") -> Optional[str]: + """ + Return the default catalog. + """ + return database.url_object.database.split("/")[0] + @classmethod def get_catalog_names( cls, diff --git a/tests/unit_tests/db_engine_specs/test_bigquery.py b/tests/unit_tests/db_engine_specs/test_bigquery.py index 616ae66841..13eecda09c 100644 --- a/tests/unit_tests/db_engine_specs/test_bigquery.py +++ b/tests/unit_tests/db_engine_specs/test_bigquery.py @@ -24,6 +24,7 @@ from typing import Optional import pytest from pytest_mock import MockFixture from sqlalchemy import select +from sqlalchemy.engine.url import make_url from sqlalchemy.sql import sqltypes from sqlalchemy_bigquery import BigQueryDialect @@ -333,3 +334,96 @@ def test_convert_dttm( from superset.db_engine_specs.bigquery import BigQueryEngineSpec as spec assert_convert_dttm(spec, target_type, expected_result, dttm) + + +def test_get_default_catalog(mocker: MockFixture) -> None: + """ + Test that we get the default catalog from the connection URI. + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + from superset.models.core import Database + + mocker.patch.object(Database, "get_sqla_engine") + get_client = mocker.patch.object(BigQueryEngineSpec, "_get_client") + get_client().project = "project" + + database = Database( + database_name="my_db", + sqlalchemy_uri="bigquery://project", + ) + assert BigQueryEngineSpec.get_default_catalog(database) == "project" + + database = Database( + database_name="my_db", + sqlalchemy_uri="bigquery:///project", + ) + assert BigQueryEngineSpec.get_default_catalog(database) == "project" + + database = Database( + database_name="my_db", + sqlalchemy_uri="bigquery://", + ) + assert BigQueryEngineSpec.get_default_catalog(database) == "project" + + +def test_adjust_engine_params_catalog_as_host() -> None: + """ + Test passing a custom catalog. + + In this test, the original URI has the catalog as the host. + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + url = make_url("bigquery://project") + + uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0] + assert str(uri) == "bigquery://project" + + uri = BigQueryEngineSpec.adjust_engine_params( + url, + {}, + catalog="other-project", + )[0] + assert str(uri) == "bigquery://other-project/" + + +def test_adjust_engine_params_catalog_as_database() -> None: + """ + Test passing a custom catalog. + + In this test, the original URI has the catalog as the database. + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + url = make_url("bigquery:///project") + + uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0] + assert str(uri) == "bigquery:///project" + + uri = BigQueryEngineSpec.adjust_engine_params( + url, + {}, + catalog="other-project", + )[0] + assert str(uri) == "bigquery://other-project/" + + +def test_adjust_engine_params_no_catalog() -> None: + """ + Test passing a custom catalog. + + In this test, the original URI has no catalog. + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + url = make_url("bigquery://") + + uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0] + assert str(uri) == "bigquery://" + + uri = BigQueryEngineSpec.adjust_engine_params( + url, + {}, + catalog="other-project", + )[0] + assert str(uri) == "bigquery://other-project/" diff --git a/tests/unit_tests/db_engine_specs/test_snowflake.py b/tests/unit_tests/db_engine_specs/test_snowflake.py index cf2393e842..dbbd58f000 100644 --- a/tests/unit_tests/db_engine_specs/test_snowflake.py +++ b/tests/unit_tests/db_engine_specs/test_snowflake.py @@ -203,3 +203,91 @@ def test_get_schema_from_engine_params() -> None: ) is None ) + + +def test_adjust_engine_params_fully_qualified() -> None: + """ + Test the ``adjust_engine_params`` method when the URL has catalog and schema. + """ + from superset.db_engine_specs.snowflake import SnowflakeEngineSpec + + url = make_url("snowflake://user:pass@account/database_name/default") + + uri = SnowflakeEngineSpec.adjust_engine_params(url, {})[0] + assert str(uri) == "snowflake://user:pass@account/database_name/default" + + uri = SnowflakeEngineSpec.adjust_engine_params( + url, + {}, + schema="new_schema", + )[0] + assert str(uri) == "snowflake://user:pass@account/database_name/new_schema" + + uri = SnowflakeEngineSpec.adjust_engine_params( + url, + {}, + catalog="new_catalog", + )[0] + assert str(uri) == "snowflake://user:pass@account/new_catalog/default" + + uri = SnowflakeEngineSpec.adjust_engine_params( + url, + {}, + catalog="new_catalog", + schema="new_schema", + )[0] + assert str(uri) == "snowflake://user:pass@account/new_catalog/new_schema" + + +def test_adjust_engine_params_catalog_only() -> None: + """ + Test the ``adjust_engine_params`` method when the URL has only the catalog. + """ + from superset.db_engine_specs.snowflake import SnowflakeEngineSpec + + url = make_url("snowflake://user:pass@account/database_name") + + uri = SnowflakeEngineSpec.adjust_engine_params(url, {})[0] + assert str(uri) == "snowflake://user:pass@account/database_name" + + uri = SnowflakeEngineSpec.adjust_engine_params( + url, + {}, + schema="new_schema", + )[0] + assert str(uri) == "snowflake://user:pass@account/database_name/new_schema" + + uri = SnowflakeEngineSpec.adjust_engine_params( + url, + {}, + catalog="new_catalog", + )[0] + assert str(uri) == "snowflake://user:pass@account/new_catalog" + + uri = SnowflakeEngineSpec.adjust_engine_params( + url, + {}, + catalog="new_catalog", + schema="new_schema", + )[0] + assert str(uri) == "snowflake://user:pass@account/new_catalog/new_schema" + + +def test_get_default_catalog() -> None: + """ + Test the ``get_default_catalog`` method. + """ + from superset.db_engine_specs.snowflake import SnowflakeEngineSpec + from superset.models.core import Database + + database = Database( + database_name="my_db", + sqlalchemy_uri="snowflake://user:pass@account/database_name", + ) + assert SnowflakeEngineSpec.get_default_catalog(database) == "database_name" + + database = Database( + database_name="my_db", + sqlalchemy_uri="snowflake://user:pass@account/database_name/default", + ) + assert SnowflakeEngineSpec.get_default_catalog(database) == "database_name"
