This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch bigquery-catalog
in repository https://gitbox.apache.org/repos/asf/superset.git

commit a652b4e711c91ff185750aef003c6f126b2a00ae
Author: Beto Dealmeida <[email protected]>
AuthorDate: Thu May 9 15:47:23 2024 -0400

    feat: add support for catalogs
---
 superset/db_engine_specs/README.md                 | 22 +----
 superset/db_engine_specs/bigquery.py               | 34 +++++++-
 superset/db_engine_specs/snowflake.py              | 28 +++++--
 tests/unit_tests/db_engine_specs/test_bigquery.py  | 94 ++++++++++++++++++++++
 tests/unit_tests/db_engine_specs/test_snowflake.py | 88 ++++++++++++++++++++
 5 files changed, 238 insertions(+), 28 deletions(-)

diff --git a/superset/db_engine_specs/README.md 
b/superset/db_engine_specs/README.md
index 4a108be658..88362f6b02 100644
--- a/superset/db_engine_specs/README.md
+++ b/superset/db_engine_specs/README.md
@@ -706,29 +706,11 @@ Hive and Trino:
 4. Table
 5. Column
 
-If the database supports catalogs, then the DB engine spec should have the 
`supports_catalog` class attribute set to true.
+If the database supports catalogs, then the DB engine spec should have the 
`supports_catalog` class attribute set to true. It should also implement the 
`get_default_catalog` method, so that the proper permissions can be created 
when datasets are added.
 
 ### Dynamic catalog
 
-Superset has no support for multiple catalogs. A given SQLAlchemy URI connects 
to a single catalog, and it's impossible to browse other catalogs, or change 
the catalog. This means that datasets can only be added for the main catalog of 
the database. For example, with this Postgres SQLAlchemy URI:
-
-```
-postgresql://admin:[email protected]:5432/db
-```
-
-Here, datasets can only be added to the `db` catalog (which Postgres calls a 
"database").
-
-One confusing problem is that many databases allow querying across catalogs in 
SQL Lab. For example, with BigQuery one can write:
-
-```sql
-SELECT * FROM project.schema.table
-```
-
-This means that **even though the database is configured for a given catalog 
(project), users can query other projects**. This is a common workaround for 
creating datasets in catalogs other than the catalog configured in the 
database: just create a virtual dataset.
-
-Ideally we would want users to be able to choose the catalog when using SQL 
Lab and when creating datasets. In order to do that, DB engine specs need to 
implement a method that rewrites the SQLAlchemy URI depending on the desired 
catalog. This method already exists, and is the same method used for dynamic 
schemas, `adjust_engine_params`, but currently there are no UI affordances for 
choosing a catalog.
-
-Before the UI is implemented Superset still needs to implement support for 
catalogs in its security manager. But in the meantime, it's possible for DB 
engine spec developers to support dynamic catalogs, by setting 
`supports_dynamic_catalog` to true and implementing `adjust_engine_params` to 
handle a catalog.
+Superset support for multiple catalogs. Since, in general, a given SQLAlchemy 
URI connects only to a single catalog, it requires DB engine specs to implement 
the `adjust_engine_params` method to rewrite the URL to connect to a different 
catalog, similar to how dynamic schemas work. Additionally, DB engine specs 
should also implement the `get_catalog_names` method, so that users can browse 
the available catalogs.
 
 ### SSH tunneling
 
diff --git a/superset/db_engine_specs/bigquery.py 
b/superset/db_engine_specs/bigquery.py
index ca52bd51ce..4fff5ab3d7 100644
--- a/superset/db_engine_specs/bigquery.py
+++ b/superset/db_engine_specs/bigquery.py
@@ -35,6 +35,7 @@ from marshmallow.exceptions import ValidationError
 from sqlalchemy import column, types
 from sqlalchemy.engine.base import Engine
 from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.engine.url import URL
 from sqlalchemy.sql import sqltypes
 
 from superset import sql_parse
@@ -127,7 +128,7 @@ class BigQueryEngineSpec(BaseEngineSpec):  # pylint: 
disable=too-many-public-met
 
     allows_hidden_cc_in_orderby = True
 
-    supports_catalog = False
+    supports_catalog = supports_dynamic_catalog = True
 
     """
     https://www.python.org/dev/peps/pep-0249/#arraysize
@@ -459,6 +460,24 @@ class BigQueryEngineSpec(BaseEngineSpec):  # pylint: 
disable=too-many-public-met
                 for statement in statements
             ]
 
+    @classmethod
+    def get_default_catalog(cls, database: Database) -> str | None:
+        """
+        Get the default catalog.
+        """
+        url = database.url_object
+
+        # The SQLAlchemy driver accepts both `bigquery://project` (where the 
project is
+        # technically a host) and `bigquery:///project` (where it's a 
database). But
+        # both can be missing, and the project is inferred from the 
authentication
+        # credentials.
+        if project := url.host or url.database:
+            return project
+
+        with database.get_sqla_engine() as engine:
+            client = cls._get_client(engine)
+            return client.project
+
     @classmethod
     def get_catalog_names(
         cls,
@@ -477,6 +496,19 @@ class BigQueryEngineSpec(BaseEngineSpec):  # pylint: 
disable=too-many-public-met
 
         return {project.project_id for project in projects}
 
+    @classmethod
+    def adjust_engine_params(
+        cls,
+        uri: URL,
+        connect_args: dict[str, Any],
+        catalog: str | None = None,
+        schema: str | None = None,
+    ) -> tuple[URL, dict[str, Any]]:
+        if catalog:
+            uri = uri.set(host=catalog, database="")
+
+        return uri, connect_args
+
     @classmethod
     def get_allow_cost_estimate(cls, extra: dict[str, Any]) -> bool:
         return True
diff --git a/superset/db_engine_specs/snowflake.py 
b/superset/db_engine_specs/snowflake.py
index 9a82cfccae..137cc4e00e 100644
--- a/superset/db_engine_specs/snowflake.py
+++ b/superset/db_engine_specs/snowflake.py
@@ -85,7 +85,7 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
     sqlalchemy_uri_placeholder = "snowflake://"
 
     supports_dynamic_schema = True
-    supports_catalog = False
+    supports_catalog = supports_dynamic_catalog = True
 
     _time_grain_expressions = {
         None: "{col}",
@@ -144,12 +144,19 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
         catalog: Optional[str] = None,
         schema: Optional[str] = None,
     ) -> tuple[URL, dict[str, Any]]:
-        database = uri.database
-        if "/" in database:
-            database = database.split("/")[0]
-        if schema:
-            schema = parse.quote(schema, safe="")
-            uri = uri.set(database=f"{database}/{schema}")
+        if "/" in uri.database:
+            current_catalog, current_schema = uri.database.split("/", 1)
+        else:
+            current_catalog, current_schema = uri.database, None
+
+        adjusted_database = "/".join(
+            [
+                catalog or current_catalog,
+                schema or current_schema or "",
+            ]
+        ).rstrip("/")
+
+        uri = uri.set(database=adjusted_database)
 
         return uri, connect_args
 
@@ -169,6 +176,13 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
 
         return parse.unquote(database.split("/")[1])
 
+    @classmethod
+    def get_default_catalog(cls, database: "Database") -> Optional[str]:
+        """
+        Return the default catalog.
+        """
+        return database.url_object.database.split("/")[0]
+
     @classmethod
     def get_catalog_names(
         cls,
diff --git a/tests/unit_tests/db_engine_specs/test_bigquery.py 
b/tests/unit_tests/db_engine_specs/test_bigquery.py
index 616ae66841..13eecda09c 100644
--- a/tests/unit_tests/db_engine_specs/test_bigquery.py
+++ b/tests/unit_tests/db_engine_specs/test_bigquery.py
@@ -24,6 +24,7 @@ from typing import Optional
 import pytest
 from pytest_mock import MockFixture
 from sqlalchemy import select
+from sqlalchemy.engine.url import make_url
 from sqlalchemy.sql import sqltypes
 from sqlalchemy_bigquery import BigQueryDialect
 
@@ -333,3 +334,96 @@ def test_convert_dttm(
     from superset.db_engine_specs.bigquery import BigQueryEngineSpec as spec
 
     assert_convert_dttm(spec, target_type, expected_result, dttm)
+
+
+def test_get_default_catalog(mocker: MockFixture) -> None:
+    """
+    Test that we get the default catalog from the connection URI.
+    """
+    from superset.db_engine_specs.bigquery import BigQueryEngineSpec
+    from superset.models.core import Database
+
+    mocker.patch.object(Database, "get_sqla_engine")
+    get_client = mocker.patch.object(BigQueryEngineSpec, "_get_client")
+    get_client().project = "project"
+
+    database = Database(
+        database_name="my_db",
+        sqlalchemy_uri="bigquery://project",
+    )
+    assert BigQueryEngineSpec.get_default_catalog(database) == "project"
+
+    database = Database(
+        database_name="my_db",
+        sqlalchemy_uri="bigquery:///project",
+    )
+    assert BigQueryEngineSpec.get_default_catalog(database) == "project"
+
+    database = Database(
+        database_name="my_db",
+        sqlalchemy_uri="bigquery://",
+    )
+    assert BigQueryEngineSpec.get_default_catalog(database) == "project"
+
+
+def test_adjust_engine_params_catalog_as_host() -> None:
+    """
+    Test passing a custom catalog.
+
+    In this test, the original URI has the catalog as the host.
+    """
+    from superset.db_engine_specs.bigquery import BigQueryEngineSpec
+
+    url = make_url("bigquery://project")
+
+    uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
+    assert str(uri) == "bigquery://project"
+
+    uri = BigQueryEngineSpec.adjust_engine_params(
+        url,
+        {},
+        catalog="other-project",
+    )[0]
+    assert str(uri) == "bigquery://other-project/"
+
+
+def test_adjust_engine_params_catalog_as_database() -> None:
+    """
+    Test passing a custom catalog.
+
+    In this test, the original URI has the catalog as the database.
+    """
+    from superset.db_engine_specs.bigquery import BigQueryEngineSpec
+
+    url = make_url("bigquery:///project")
+
+    uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
+    assert str(uri) == "bigquery:///project"
+
+    uri = BigQueryEngineSpec.adjust_engine_params(
+        url,
+        {},
+        catalog="other-project",
+    )[0]
+    assert str(uri) == "bigquery://other-project/"
+
+
+def test_adjust_engine_params_no_catalog() -> None:
+    """
+    Test passing a custom catalog.
+
+    In this test, the original URI has no catalog.
+    """
+    from superset.db_engine_specs.bigquery import BigQueryEngineSpec
+
+    url = make_url("bigquery://")
+
+    uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
+    assert str(uri) == "bigquery://"
+
+    uri = BigQueryEngineSpec.adjust_engine_params(
+        url,
+        {},
+        catalog="other-project",
+    )[0]
+    assert str(uri) == "bigquery://other-project/"
diff --git a/tests/unit_tests/db_engine_specs/test_snowflake.py 
b/tests/unit_tests/db_engine_specs/test_snowflake.py
index cf2393e842..dbbd58f000 100644
--- a/tests/unit_tests/db_engine_specs/test_snowflake.py
+++ b/tests/unit_tests/db_engine_specs/test_snowflake.py
@@ -203,3 +203,91 @@ def test_get_schema_from_engine_params() -> None:
         )
         is None
     )
+
+
+def test_adjust_engine_params_fully_qualified() -> None:
+    """
+    Test the ``adjust_engine_params`` method when the URL has catalog and 
schema.
+    """
+    from superset.db_engine_specs.snowflake import SnowflakeEngineSpec
+
+    url = make_url("snowflake://user:pass@account/database_name/default")
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(url, {})[0]
+    assert str(uri) == "snowflake://user:pass@account/database_name/default"
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(
+        url,
+        {},
+        schema="new_schema",
+    )[0]
+    assert str(uri) == "snowflake://user:pass@account/database_name/new_schema"
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(
+        url,
+        {},
+        catalog="new_catalog",
+    )[0]
+    assert str(uri) == "snowflake://user:pass@account/new_catalog/default"
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(
+        url,
+        {},
+        catalog="new_catalog",
+        schema="new_schema",
+    )[0]
+    assert str(uri) == "snowflake://user:pass@account/new_catalog/new_schema"
+
+
+def test_adjust_engine_params_catalog_only() -> None:
+    """
+    Test the ``adjust_engine_params`` method when the URL has only the catalog.
+    """
+    from superset.db_engine_specs.snowflake import SnowflakeEngineSpec
+
+    url = make_url("snowflake://user:pass@account/database_name")
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(url, {})[0]
+    assert str(uri) == "snowflake://user:pass@account/database_name"
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(
+        url,
+        {},
+        schema="new_schema",
+    )[0]
+    assert str(uri) == "snowflake://user:pass@account/database_name/new_schema"
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(
+        url,
+        {},
+        catalog="new_catalog",
+    )[0]
+    assert str(uri) == "snowflake://user:pass@account/new_catalog"
+
+    uri = SnowflakeEngineSpec.adjust_engine_params(
+        url,
+        {},
+        catalog="new_catalog",
+        schema="new_schema",
+    )[0]
+    assert str(uri) == "snowflake://user:pass@account/new_catalog/new_schema"
+
+
+def test_get_default_catalog() -> None:
+    """
+    Test the ``get_default_catalog`` method.
+    """
+    from superset.db_engine_specs.snowflake import SnowflakeEngineSpec
+    from superset.models.core import Database
+
+    database = Database(
+        database_name="my_db",
+        sqlalchemy_uri="snowflake://user:pass@account/database_name",
+    )
+    assert SnowflakeEngineSpec.get_default_catalog(database) == "database_name"
+
+    database = Database(
+        database_name="my_db",
+        sqlalchemy_uri="snowflake://user:pass@account/database_name/default",
+    )
+    assert SnowflakeEngineSpec.get_default_catalog(database) == "database_name"

Reply via email to