This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch import-export-catalog-field
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/import-export-catalog-field by
this push:
new a71092aa5d fix: export/import catalogs
a71092aa5d is described below
commit a71092aa5d86009ef94b4145470337c16a5a2657
Author: Beto Dealmeida <[email protected]>
AuthorDate: Thu May 9 12:05:27 2024 -0400
fix: export/import catalogs
---
superset/connectors/sqla/models.py | 12 +++-
superset/models/sql_lab.py | 4 ++
tests/unit_tests/commands/report/base_test.py | 2 +
tests/unit_tests/connectors/sqla/models_test.py | 76 ++++++++++++++++++++++
tests/unit_tests/datasets/commands/export_test.py | 2 +
.../datasets/commands/importers/v1/import_test.py | 2 +
6 files changed, 97 insertions(+), 1 deletion(-)
diff --git a/superset/connectors/sqla/models.py
b/superset/connectors/sqla/models.py
index ca1adba0ac..8c74dfd589 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -285,6 +285,11 @@ class BaseDatasource(AuditMixinNullable,
ImportExportMixin): # pylint: disable=
"""String representing the context of the Datasource"""
return None
+ @property
+ def catalog(self) -> str | None:
+ """String representing the catalog of the Datasource (if it applies)"""
+ return None
+
@property
def schema(self) -> str | None:
"""String representing the schema of the Datasource (if it applies)"""
@@ -330,6 +335,7 @@ class BaseDatasource(AuditMixinNullable,
ImportExportMixin): # pylint: disable=
"edit_url": self.url,
"id": self.id,
"uid": self.uid,
+ "catalog": self.catalog,
"schema": self.schema or None,
"name": self.name,
"type": self.type,
@@ -384,6 +390,7 @@ class BaseDatasource(AuditMixinNullable,
ImportExportMixin): # pylint: disable=
"datasource_name": self.datasource_name,
"table_name": self.datasource_name,
"type": self.type,
+ "catalog": self.catalog,
"schema": self.schema or None,
"offset": self.offset,
"cache_timeout": self.cache_timeout,
@@ -1135,7 +1142,9 @@ class SqlaTable(
# The reason it does not physically exist is MySQL, PostgreSQL, etc. have a
# different interpretation of uniqueness when it comes to NULL which is
problematic
# given the schema is optional.
- __table_args__ = (UniqueConstraint("database_id", "schema", "table_name"),)
+ __table_args__ = (
+ UniqueConstraint("database_id", "catalog", "schema", "table_name"),
+ )
table_name = Column(String(250), nullable=False)
main_dttm_col = Column(String(250))
@@ -1166,6 +1175,7 @@ class SqlaTable(
"database_id",
"offset",
"cache_timeout",
+ "catalog",
"schema",
"sql",
"params",
diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py
index 4c06867501..41647ea43b 100644
--- a/superset/models/sql_lab.py
+++ b/superset/models/sql_lab.py
@@ -169,6 +169,7 @@ class Query(
"limitingFactor": self.limiting_factor,
"progress": self.progress,
"rows": self.rows,
+ "catalog": self.catalog,
"schema": self.schema,
"ctas": self.select_as_cta,
"serverId": self.id,
@@ -251,6 +252,7 @@ class Query(
"owners": self.owners_data,
"database": {"id": self.database_id, "backend":
self.database.backend},
"order_by_choices": order_by_choices,
+ "catalog": self.catalog,
"schema": self.schema,
"verbose_map": {},
}
@@ -415,6 +417,7 @@ class SavedQuery(
export_parent = "database"
export_fields = [
+ "catalog",
"schema",
"label",
"description",
@@ -557,6 +560,7 @@ class TableSchema(AuditMixinNullable, ExtraJSONMixin,
Model):
"id": self.id,
"tab_state_id": self.tab_state_id,
"database_id": self.database_id,
+ "catalog": self.catalog,
"schema": self.schema,
"table": self.table,
"description": description,
diff --git a/tests/unit_tests/commands/report/base_test.py
b/tests/unit_tests/commands/report/base_test.py
index 499682a1e6..871f3a511b 100644
--- a/tests/unit_tests/commands/report/base_test.py
+++ b/tests/unit_tests/commands/report/base_test.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+from __future__ import annotations
+
import logging
from datetime import timedelta
from functools import wraps
diff --git a/tests/unit_tests/connectors/sqla/models_test.py
b/tests/unit_tests/connectors/sqla/models_test.py
index 687295baf8..c1e06f3755 100644
--- a/tests/unit_tests/connectors/sqla/models_test.py
+++ b/tests/unit_tests/connectors/sqla/models_test.py
@@ -18,10 +18,14 @@
import pytest
from pytest_mock import MockerFixture
from sqlalchemy import create_engine
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.orm.session import Session
from superset.connectors.sqla.models import SqlaTable
+from superset.daos.dataset import DatasetDAO
from superset.exceptions import OAuth2RedirectError
from superset.models.core import Database
+from superset.sql_parse import Table
from superset.superset_typing import QueryObjectDict
@@ -187,3 +191,75 @@ def
test_query_datasources_by_permissions_with_catalog_schema(
"tables.schema_perm IN ('[my_db].[db1].[schema1]',
'[my_other_db].[schema]') OR "
"tables.catalog_perm IN ('[my_db].[db1]')"
)
+
+
+def test_dataset_uniqueness(session: Session) -> None:
+ """
+ Test dataset uniqueness constraints.
+ """
+ Database.metadata.create_all(session.bind)
+
+ database = Database(database_name="my_db", sqlalchemy_uri="sqlite://")
+
+ # add prod.schema.table
+ dataset = SqlaTable(
+ database=database,
+ catalog="prod",
+ schema="schema",
+ table_name="table",
+ )
+ session.add(dataset)
+ session.commit()
+
+ # add dev.schema.table
+ dataset = SqlaTable(
+ database=database,
+ catalog="dev",
+ schema="schema",
+ table_name="table",
+ )
+ session.add(dataset)
+ session.commit()
+
+ # try to add dev.schema.table again, fails
+ dataset = SqlaTable(
+ database=database,
+ catalog="dev",
+ schema="schema",
+ table_name="table",
+ )
+ session.add(dataset)
+ with pytest.raises(IntegrityError):
+ session.commit()
+ session.rollback()
+
+ # add schema.table
+ dataset = SqlaTable(
+ database=database,
+ catalog=None,
+ schema="schema",
+ table_name="table",
+ )
+ session.add(dataset)
+ session.commit()
+
+ # add schema.table again, works because in SQL `NULlL != NULL`
+ dataset = SqlaTable(
+ database=database,
+ catalog=None,
+ schema="schema",
+ table_name="table",
+ )
+ session.add(dataset)
+ session.commit()
+
+ # but the DAO enforces application logic for uniqueness
+ assert not DatasetDAO.validate_uniqueness(
+ database.id,
+ Table("table", "schema", None),
+ )
+
+ assert DatasetDAO.validate_uniqueness(
+ database.id,
+ Table("table", "schema", "some_catalog"),
+ )
diff --git a/tests/unit_tests/datasets/commands/export_test.py
b/tests/unit_tests/datasets/commands/export_test.py
index fbfa8d346c..9104e5b76e 100644
--- a/tests/unit_tests/datasets/commands/export_test.py
+++ b/tests/unit_tests/datasets/commands/export_test.py
@@ -68,6 +68,7 @@ def test_export(session: Session) -> None:
description="This is the description",
is_featured=1,
cache_timeout=3600,
+ catalog="public",
schema="my_schema",
sql=None,
params=json.dumps(
@@ -111,6 +112,7 @@ description: This is the description
default_endpoint: null
offset: -8
cache_timeout: 3600
+catalog: public
schema: my_schema
sql: null
params:
diff --git a/tests/unit_tests/datasets/commands/importers/v1/import_test.py
b/tests/unit_tests/datasets/commands/importers/v1/import_test.py
index 511b60188a..6c306007d3 100644
--- a/tests/unit_tests/datasets/commands/importers/v1/import_test.py
+++ b/tests/unit_tests/datasets/commands/importers/v1/import_test.py
@@ -61,6 +61,7 @@ def test_import_dataset(mocker: MockFixture, session:
Session) -> None:
"default_endpoint": None,
"offset": -8,
"cache_timeout": 3600,
+ "catalog": "public",
"schema": "my_schema",
"sql": None,
"params": {
@@ -115,6 +116,7 @@ def test_import_dataset(mocker: MockFixture, session:
Session) -> None:
assert sqla_table.default_endpoint is None
assert sqla_table.offset == -8
assert sqla_table.cache_timeout == 3600
+ assert sqla_table.catalog == "public"
assert sqla_table.schema == "my_schema"
assert sqla_table.sql is None
assert sqla_table.params == json.dumps(