aminghadersohi commented on code in PR #40494:
URL: https://github.com/apache/superset/pull/40494#discussion_r3343381742
##########
tests/integration_tests/datasets/api_tests.py:
##########
@@ -2980,6 +2980,81 @@ def test_get_or_create_dataset_creates_table(self):
with examples_db.get_sqla_engine() as engine:
engine.execute("DROP TABLE test_create_sqla_table_api")
+ def test_get_or_create_dataset_disambiguates_by_schema(self):
+ """
+ Dataset API: Regression test for #30377.
+
+ ``get_or_create`` must filter on ``schema`` as well as ``table_name``.
+ Otherwise:
+
+ - Two pre-existing datasets sharing a ``table_name`` across different
+ schemas make ``one_or_none()`` raise ``MultipleResultsFound`` → 500.
+ - A single existing dataset in schema A is wrongly returned when the
+ caller asks for the same ``table_name`` in schema B (false positive).
+ """
+ # SQLite's test schema carries a legacy single-column unique constraint
+ # on ``tables.table_name`` that contradicts the modern composite
+ # ``(database_id, catalog, schema, table_name)`` key, so inserting two
+ # datasets that share a ``table_name`` fails at INSERT regardless of
+ # schema. Postgres and MySQL behave correctly.
+ if get_main_database().backend == "sqlite":
+ return
+
+ self.login(ADMIN_USERNAME)
+ admin_id = self.get_user("admin").id
+ examples_db = get_example_database()
+ table_name = "test_get_or_create_schema_disambiguation"
+
+ # ``fetch_metadata=False`` so the helper doesn't try to introspect
+ # ``schema_a`` / ``schema_b`` against the real example DB — those
+ # schemas only need to exist as metadata rows for this test.
+ ds_schema_a = self.insert_dataset(
+ table_name,
+ [admin_id],
+ examples_db,
+ schema="schema_a",
+ fetch_metadata=False,
+ )
+ ds_schema_b = self.insert_dataset(
+ table_name,
+ [admin_id],
+ examples_db,
+ schema="schema_b",
+ fetch_metadata=False,
+ )
+
+ # Case 1: ask for an existing schema — must return that exact dataset,
+ # not raise MultipleResultsFound.
+ rv = self.client.post(
+ "api/v1/dataset/get_or_create/",
+ json={
+ "table_name": table_name,
+ "schema": "schema_a",
+ "database_id": examples_db.id,
+ },
+ )
+ assert rv.status_code == 200
+ assert json.loads(rv.data.decode("utf-8"))["result"] == {
+ "table_id": ds_schema_a.id
+ }
+
+ rv = self.client.post(
+ "api/v1/dataset/get_or_create/",
+ json={
+ "table_name": table_name,
+ "schema": "schema_b",
+ "database_id": examples_db.id,
+ },
+ )
+ assert rv.status_code == 200
+ assert json.loads(rv.data.decode("utf-8"))["result"] == {
+ "table_id": ds_schema_b.id
+ }
+
+ # Cleanup the seed rows; the "new schema" path creates an additional
+ # dataset that the test_client owns — also handed off to teardown.
+ self.items_to_delete = [ds_schema_a, ds_schema_b]
Review Comment:
`self.items_to_delete` is set after the assertions. If either `assert
rv.status_code == 200` fails (lines 3036 or 3049), `tearDown` never sees these
datasets — they leak into subsequent tests.
Move the assignment to immediately after line 3024 (`ds_schema_b = ...`),
before the first `client.post` call.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]