codeant-ai-for-open-source[bot] commented on code in PR #40130: URL: https://github.com/apache/superset/pull/40130#discussion_r3397364969
########## tests/integration_tests/datasets/soft_delete_tests.py: ########## @@ -0,0 +1,454 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Integration tests for dataset soft-delete and restore.""" + +from datetime import datetime + +from superset import security_manager +from superset.connectors.sqla.models import SqlaTable +from superset.constants import SKIP_VISIBILITY_FILTER_CLASSES +from superset.extensions import db +from superset.models.core import Database +from superset.models.slice import Slice +from superset.utils import json +from tests.integration_tests.base_tests import SupersetTestCase +from tests.integration_tests.constants import ( + ADMIN_USERNAME, + ALPHA_USERNAME, + GAMMA_USERNAME, +) + + +def _restore_dataset(dataset_id: int) -> None: + """Restore a soft-deleted dataset (cleanup helper). + + Module-level so every test class in this file can use it. Used in + ``finally`` blocks so a failed assertion can't strand a soft-deleted + row and leak it into later tests; re-queries with the + visibility-filter bypass and only restores if still soft-deleted. + """ + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + if row is not None and row.deleted_at is not None: + row.restore() + db.session.commit() + + +class TestDatasetSoftDelete(SupersetTestCase): + """Tests for dataset soft-delete behaviour (T015, T018).""" + + def _get_example_dataset_id(self) -> int: + """Get an existing example dataset ID for testing.""" + dataset = db.session.query(SqlaTable).first() + assert dataset is not None, "No datasets found — load examples first" + return dataset.id + + def _restore_dataset(self, dataset_id: int) -> None: + """Class-method shim over the module-level helper (existing call sites).""" + _restore_dataset(dataset_id) + + def test_delete_dataset_soft_deletes(self) -> None: + """DELETE should set deleted_at instead of removing the row.""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + assert row is not None + assert row.deleted_at is not None + finally: + self._restore_dataset(dataset_id) + + def test_soft_deleted_dataset_excluded_from_list(self) -> None: + """GET /api/v1/dataset/ should not include soft-deleted datasets.""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + rv = self.client.get("/api/v1/dataset/") + data = json.loads(rv.data) + ids = [d["id"] for d in data["result"]] + assert dataset_id not in ids + finally: + self._restore_dataset(dataset_id) + + def test_soft_deleted_dataset_included_in_list_when_requested(self) -> None: + """GET /api/v1/dataset/ with dataset_deleted_state=include returns deleted datasets.""" # noqa: E501 + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + rison_query = ( + "(filters:!((col:id,opr:dataset_deleted_state,value:include)))" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + + data = json.loads(rv.data) + deleted_row = next( + (row for row in data["result"] if row["id"] == dataset_id), + None, + ) + assert deleted_row is not None + assert deleted_row["deleted_at"] is not None + finally: + self._restore_dataset(dataset_id) + + def test_only_filter_returns_only_soft_deleted_datasets(self) -> None: + """dataset_deleted_state=only excludes live rows and returns only deleted ones.""" # noqa: E501 + ids = [row.id for row in db.session.query(SqlaTable).limit(2).all()] + assert len(ids) >= 2, "Need at least two example datasets for this test" + live_id, deleted_id = ids[0], ids[1] + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{deleted_id}") + assert rv.status_code == 200 + + rison_query = "(filters:!((col:id,opr:dataset_deleted_state,value:only)))" + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + + data = json.loads(rv.data) + returned_ids = {row["id"] for row in data["result"]} + assert deleted_id in returned_ids + assert live_id not in returned_ids + finally: + self._restore_dataset(deleted_id) + + def _hard_delete_created(self, dataset_id: int, database: Database) -> None: + """Remove a test-created dataset + its database (visibility bypassed).""" + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + if row: + db.session.delete(row) + db.session.delete(database) + db.session.commit() + + def test_deleted_state_list_shows_owner_their_own_deleted(self) -> None: + """A non-admin owner can still enumerate their own soft-deleted datasets. + Deleted-state scoping mirrors the restore audience, so it must not lock + owners out of their own trash.""" + alpha = self.get_user(ALPHA_USERNAME) + database = Database(database_name="sd_owner_db", sqlalchemy_uri="sqlite://") + db.session.add(database) + db.session.flush() + dataset = SqlaTable( + table_name="sd_owner_tbl", database=database, owners=[alpha] + ) + db.session.add(dataset) + db.session.commit() + dataset_id = dataset.id + + dataset.deleted_at = datetime(2026, 1, 1, 12, 0, 0) + db.session.commit() + + self.login(ALPHA_USERNAME) + rison_query = ( + "(filters:!((col:id,opr:dataset_deleted_state,value:only)),page_size:200)" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + ids = [r["id"] for r in json.loads(rv.data)["result"]] + assert dataset_id in ids + + self._hard_delete_created(dataset_id, database) + Review Comment: **Suggestion:** This test creates a database and dataset but does not wrap cleanup in a `finally` block. If any assertion fails before the last line, the created rows remain in the shared test DB and can make later tests fail unpredictably. Move `_hard_delete_created(...)` into a `finally` block so teardown always runs. [missing cleanup] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ⚠️ Failed test leaves extra database and dataset in fixtures. - ⚠️ Later integration tests run against polluted dataset state. ``` </details> <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Run `TestDatasetSoftDelete.test_deleted_state_list_shows_owner_their_own_deleted` in `tests/integration_tests/datasets/soft_delete_tests.py:164-193`. 2. The test creates a `Database` named `"sd_owner_db"` and a `SqlaTable` `"sd_owner_tbl"` owned by Alpha, then marks it soft-deleted by setting `dataset.deleted_at` and committing (lines 20-31 in the same file). 3. The test logs in as `ALPHA_USERNAME` and calls `GET /api/v1/dataset/?q=...dataset_deleted_state=value:only` (lines 33-37), then asserts a 200 status and that `dataset_id` appears in the result (lines 38-40). If the deleted-state filter logic regresses so the dataset is missing or the status is not 200, one of these assertions raises, aborting the test before line 42. 4. Because `_hard_delete_created(dataset_id, database)` (line 42) is not in a `try/finally`, the failure path leaves both the `sd_owner_db` `Database` row and its soft-deleted `SqlaTable` row in the shared SQLAlchemy session for the remainder of the test run, instead of reliably cleaning them up as `_hard_delete_created` is designed to do (lines 2-13). ``` </details> [Fix in Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=0ac16df63fb5469f9c5d9332e8c6da5e&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) | [Fix in VSCode Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=0ac16df63fb5469f9c5d9332e8c6da5e&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) *(Use Cmd/Ctrl + Click for best experience)* <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** tests/integration_tests/datasets/soft_delete_tests.py **Line:** 183:192 **Comment:** *Missing Cleanup: This test creates a database and dataset but does not wrap cleanup in a `finally` block. If any assertion fails before the last line, the created rows remain in the shared test DB and can make later tests fail unpredictably. Move `_hard_delete_created(...)` into a `finally` block so teardown always runs. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=c912078090c7cc244183d674e9937c22f934b68b29f7581b8cefc1677ec178b2&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=c912078090c7cc244183d674e9937c22f934b68b29f7581b8cefc1677ec178b2&reaction=dislike'>👎</a> ########## tests/integration_tests/datasets/soft_delete_tests.py: ########## @@ -0,0 +1,454 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Integration tests for dataset soft-delete and restore.""" + +from datetime import datetime + +from superset import security_manager +from superset.connectors.sqla.models import SqlaTable +from superset.constants import SKIP_VISIBILITY_FILTER_CLASSES +from superset.extensions import db +from superset.models.core import Database +from superset.models.slice import Slice +from superset.utils import json +from tests.integration_tests.base_tests import SupersetTestCase +from tests.integration_tests.constants import ( + ADMIN_USERNAME, + ALPHA_USERNAME, + GAMMA_USERNAME, +) + + +def _restore_dataset(dataset_id: int) -> None: + """Restore a soft-deleted dataset (cleanup helper). + + Module-level so every test class in this file can use it. Used in + ``finally`` blocks so a failed assertion can't strand a soft-deleted + row and leak it into later tests; re-queries with the + visibility-filter bypass and only restores if still soft-deleted. + """ + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + if row is not None and row.deleted_at is not None: + row.restore() + db.session.commit() + + +class TestDatasetSoftDelete(SupersetTestCase): + """Tests for dataset soft-delete behaviour (T015, T018).""" + + def _get_example_dataset_id(self) -> int: + """Get an existing example dataset ID for testing.""" + dataset = db.session.query(SqlaTable).first() + assert dataset is not None, "No datasets found — load examples first" + return dataset.id + + def _restore_dataset(self, dataset_id: int) -> None: + """Class-method shim over the module-level helper (existing call sites).""" + _restore_dataset(dataset_id) + + def test_delete_dataset_soft_deletes(self) -> None: + """DELETE should set deleted_at instead of removing the row.""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + assert row is not None + assert row.deleted_at is not None + finally: + self._restore_dataset(dataset_id) + + def test_soft_deleted_dataset_excluded_from_list(self) -> None: + """GET /api/v1/dataset/ should not include soft-deleted datasets.""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + rv = self.client.get("/api/v1/dataset/") + data = json.loads(rv.data) + ids = [d["id"] for d in data["result"]] + assert dataset_id not in ids + finally: + self._restore_dataset(dataset_id) + + def test_soft_deleted_dataset_included_in_list_when_requested(self) -> None: + """GET /api/v1/dataset/ with dataset_deleted_state=include returns deleted datasets.""" # noqa: E501 + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + rison_query = ( + "(filters:!((col:id,opr:dataset_deleted_state,value:include)))" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + + data = json.loads(rv.data) + deleted_row = next( + (row for row in data["result"] if row["id"] == dataset_id), + None, + ) + assert deleted_row is not None + assert deleted_row["deleted_at"] is not None + finally: + self._restore_dataset(dataset_id) + + def test_only_filter_returns_only_soft_deleted_datasets(self) -> None: + """dataset_deleted_state=only excludes live rows and returns only deleted ones.""" # noqa: E501 + ids = [row.id for row in db.session.query(SqlaTable).limit(2).all()] + assert len(ids) >= 2, "Need at least two example datasets for this test" + live_id, deleted_id = ids[0], ids[1] + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{deleted_id}") + assert rv.status_code == 200 + + rison_query = "(filters:!((col:id,opr:dataset_deleted_state,value:only)))" + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + + data = json.loads(rv.data) + returned_ids = {row["id"] for row in data["result"]} + assert deleted_id in returned_ids + assert live_id not in returned_ids + finally: + self._restore_dataset(deleted_id) + + def _hard_delete_created(self, dataset_id: int, database: Database) -> None: + """Remove a test-created dataset + its database (visibility bypassed).""" + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + if row: + db.session.delete(row) + db.session.delete(database) + db.session.commit() + + def test_deleted_state_list_shows_owner_their_own_deleted(self) -> None: + """A non-admin owner can still enumerate their own soft-deleted datasets. + Deleted-state scoping mirrors the restore audience, so it must not lock + owners out of their own trash.""" + alpha = self.get_user(ALPHA_USERNAME) + database = Database(database_name="sd_owner_db", sqlalchemy_uri="sqlite://") + db.session.add(database) + db.session.flush() + dataset = SqlaTable( + table_name="sd_owner_tbl", database=database, owners=[alpha] + ) + db.session.add(dataset) + db.session.commit() + dataset_id = dataset.id + + dataset.deleted_at = datetime(2026, 1, 1, 12, 0, 0) + db.session.commit() + + self.login(ALPHA_USERNAME) + rison_query = ( + "(filters:!((col:id,opr:dataset_deleted_state,value:only)),page_size:200)" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + ids = [r["id"] for r in json.loads(rv.data)["result"]] + assert dataset_id in ids + + self._hard_delete_created(dataset_id, database) + + def test_deleted_state_list_hides_non_owned_from_read_access_user(self) -> None: + """A read-access non-owner must not enumerate a dataset once it is + soft-deleted. + + Gamma is granted ``datasource_access`` to the dataset, so + ``DatasourceFilter`` makes it visible to gamma while live. After + soft-delete, the deleted-state list is scoped to the restore audience + (owners/admins), so gamma — who could never restore it — must not see it + via ``include`` or ``only``. + """ + admin = self.get_user(ADMIN_USERNAME) + database = Database(database_name="sd_acl_db", sqlalchemy_uri="sqlite://") + db.session.add(database) + db.session.flush() + dataset = SqlaTable(table_name="sd_acl_tbl", database=database, owners=[admin]) + db.session.add(dataset) + db.session.commit() + dataset_id = dataset.id + + gamma_role = security_manager.find_role("Gamma") + pvm = security_manager.add_permission_view_menu( + "datasource_access", dataset.perm + ) + gamma_role.permissions.append(pvm) + db.session.commit() + + try: + # Precondition: gamma can see the dataset while it is live. + self.login(GAMMA_USERNAME) + rv = self.client.get("/api/v1/dataset/?q=(page_size:200)") + assert dataset_id in [r["id"] for r in json.loads(rv.data)["result"]], ( + "precondition: gamma should see the live dataset via datasource access" + ) + + reloaded = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one() + ) + reloaded.deleted_at = datetime(2026, 1, 1, 12, 0, 0) + db.session.commit() + + for value in ("include", "only"): + rison_query = ( + f"(filters:!((col:id,opr:dataset_deleted_state,value:{value}))," + "page_size:200)" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + ids = [r["id"] for r in json.loads(rv.data)["result"]] + assert dataset_id not in ids, ( + "read-access non-owner must not enumerate a soft-deleted " + f"dataset via dataset_deleted_state={value}" + ) + finally: + pvm = security_manager.find_permission_view_menu( + "datasource_access", dataset.perm + ) + if pvm: + security_manager.del_permission_role(gamma_role, pvm) + db.session.commit() + self._hard_delete_created(dataset_id, database) + + def test_no_cascade_to_dependent_charts(self) -> None: + """Soft-deleting a dataset should NOT cascade to its charts (FR-009, T018).""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + # Find charts that depend on this dataset + dependent_charts = ( + db.session.query(Slice) + .filter(Slice.datasource_id == dataset_id, Slice.datasource_type == "table") + .all() + ) + dependent_chart_ids = [c.id for c in dependent_charts] + + try: + # Soft-delete the dataset + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + # Dependent charts should still be active (no cascade). On this + # branch ``Slice`` does not yet carry ``deleted_at`` (added by the + # charts soft-delete PR), so we only verify the row is still + # loadable through the default visibility-filtered query — which + # would return None if the chart had been soft-deleted. + for chart_id in dependent_chart_ids: + chart = ( + db.session.query(Slice).filter(Slice.id == chart_id).one_or_none() + ) + assert chart is not None, f"Chart {chart_id} should still be active" + finally: + self._restore_dataset(dataset_id) + + +class TestDatasetRestore(SupersetTestCase): + """Tests for dataset restore behaviour (T027).""" + + def _get_example_dataset(self) -> SqlaTable: + dataset = db.session.query(SqlaTable).first() + assert dataset is not None + return dataset + + def test_restore_soft_deleted_dataset(self) -> None: + """POST /api/v1/dataset/<uuid>/restore should make it visible again.""" + dataset = self._get_example_dataset() + dataset_id = dataset.id + dataset_uuid = str(dataset.uuid) + self.login(ADMIN_USERNAME) + + self.client.delete(f"/api/v1/dataset/{dataset_id}") + + rv = self.client.post(f"/api/v1/dataset/{dataset_uuid}/restore") + assert rv.status_code == 200 + + rv = self.client.get(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 Review Comment: **Suggestion:** This flow soft-deletes a shared example dataset but has no `finally` cleanup; if restore/get assertions fail, the dataset stays deleted and can break subsequent integration tests that depend on example data. Add guaranteed cleanup with `_restore_dataset(dataset_id)` in a `finally` block. [missing cleanup] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ⚠️ Restore-happy-path test can strand example dataset soft-deleted. - ⚠️ Downstream tests depending on examples may unexpectedly fail. ``` </details> <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Run `TestDatasetRestore.test_restore_soft_deleted_dataset` in `tests/integration_tests/datasets/soft_delete_tests.py:69-79` (around lines 299-310 in the PR hunk). 2. The test calls `_get_example_dataset()` (lines 64-67), which does `db.session.query(SqlaTable).first()` and returns a shared example dataset, then logs in as `ADMIN_USERNAME` (lines 71-74). 3. It soft-deletes that shared dataset with `DELETE /api/v1/dataset/<id>` (line 76), posts to `/api/v1/dataset/<uuid>/restore` and asserts a 200 status (lines 78-79), then does a `GET /api/v1/dataset/<id>` and asserts 200 again (lines 2-3 of the subsequent chunk). If a regression causes either the restore POST or the GET to return a non-200 status, one of these assertions fails. 4. Because there is no `try/finally` around the `DELETE` and no explicit cleanup helper in this test (unlike `test_no_cascade_to_dependent_charts`, which restores in a `finally` at lines 42-58), a failing assertion leaves the shared example dataset soft-deleted in the database for the rest of the test session. ``` </details> [Fix in Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=45ea6954a24a4b4d901d0cc908f62577&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) | [Fix in VSCode Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=45ea6954a24a4b4d901d0cc908f62577&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) *(Use Cmd/Ctrl + Click for best experience)* <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** tests/integration_tests/datasets/soft_delete_tests.py **Line:** 304:310 **Comment:** *Missing Cleanup: This flow soft-deletes a shared example dataset but has no `finally` cleanup; if restore/get assertions fail, the dataset stays deleted and can break subsequent integration tests that depend on example data. Add guaranteed cleanup with `_restore_dataset(dataset_id)` in a `finally` block. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=bc2871e68b9f956ea0fe0ebe1ef93ea4315e3b1469e1cc95fedd5783cb69fbbc&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=bc2871e68b9f956ea0fe0ebe1ef93ea4315e3b1469e1cc95fedd5783cb69fbbc&reaction=dislike'>👎</a> ########## tests/integration_tests/datasets/soft_delete_tests.py: ########## @@ -0,0 +1,454 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Integration tests for dataset soft-delete and restore.""" + +from datetime import datetime + +from superset import security_manager +from superset.connectors.sqla.models import SqlaTable +from superset.constants import SKIP_VISIBILITY_FILTER_CLASSES +from superset.extensions import db +from superset.models.core import Database +from superset.models.slice import Slice +from superset.utils import json +from tests.integration_tests.base_tests import SupersetTestCase +from tests.integration_tests.constants import ( + ADMIN_USERNAME, + ALPHA_USERNAME, + GAMMA_USERNAME, +) + + +def _restore_dataset(dataset_id: int) -> None: + """Restore a soft-deleted dataset (cleanup helper). + + Module-level so every test class in this file can use it. Used in + ``finally`` blocks so a failed assertion can't strand a soft-deleted + row and leak it into later tests; re-queries with the + visibility-filter bypass and only restores if still soft-deleted. + """ + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + if row is not None and row.deleted_at is not None: + row.restore() + db.session.commit() + + +class TestDatasetSoftDelete(SupersetTestCase): + """Tests for dataset soft-delete behaviour (T015, T018).""" + + def _get_example_dataset_id(self) -> int: + """Get an existing example dataset ID for testing.""" + dataset = db.session.query(SqlaTable).first() + assert dataset is not None, "No datasets found — load examples first" + return dataset.id + + def _restore_dataset(self, dataset_id: int) -> None: + """Class-method shim over the module-level helper (existing call sites).""" + _restore_dataset(dataset_id) + + def test_delete_dataset_soft_deletes(self) -> None: + """DELETE should set deleted_at instead of removing the row.""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + assert row is not None + assert row.deleted_at is not None + finally: + self._restore_dataset(dataset_id) + + def test_soft_deleted_dataset_excluded_from_list(self) -> None: + """GET /api/v1/dataset/ should not include soft-deleted datasets.""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + rv = self.client.get("/api/v1/dataset/") + data = json.loads(rv.data) + ids = [d["id"] for d in data["result"]] + assert dataset_id not in ids + finally: + self._restore_dataset(dataset_id) + + def test_soft_deleted_dataset_included_in_list_when_requested(self) -> None: + """GET /api/v1/dataset/ with dataset_deleted_state=include returns deleted datasets.""" # noqa: E501 + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + rison_query = ( + "(filters:!((col:id,opr:dataset_deleted_state,value:include)))" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + + data = json.loads(rv.data) + deleted_row = next( + (row for row in data["result"] if row["id"] == dataset_id), + None, + ) + assert deleted_row is not None + assert deleted_row["deleted_at"] is not None + finally: + self._restore_dataset(dataset_id) + + def test_only_filter_returns_only_soft_deleted_datasets(self) -> None: + """dataset_deleted_state=only excludes live rows and returns only deleted ones.""" # noqa: E501 + ids = [row.id for row in db.session.query(SqlaTable).limit(2).all()] + assert len(ids) >= 2, "Need at least two example datasets for this test" + live_id, deleted_id = ids[0], ids[1] + self.login(ADMIN_USERNAME) + + try: + rv = self.client.delete(f"/api/v1/dataset/{deleted_id}") + assert rv.status_code == 200 + + rison_query = "(filters:!((col:id,opr:dataset_deleted_state,value:only)))" + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + + data = json.loads(rv.data) + returned_ids = {row["id"] for row in data["result"]} + assert deleted_id in returned_ids + assert live_id not in returned_ids + finally: + self._restore_dataset(deleted_id) + + def _hard_delete_created(self, dataset_id: int, database: Database) -> None: + """Remove a test-created dataset + its database (visibility bypassed).""" + row = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one_or_none() + ) + if row: + db.session.delete(row) + db.session.delete(database) + db.session.commit() + + def test_deleted_state_list_shows_owner_their_own_deleted(self) -> None: + """A non-admin owner can still enumerate their own soft-deleted datasets. + Deleted-state scoping mirrors the restore audience, so it must not lock + owners out of their own trash.""" + alpha = self.get_user(ALPHA_USERNAME) + database = Database(database_name="sd_owner_db", sqlalchemy_uri="sqlite://") + db.session.add(database) + db.session.flush() + dataset = SqlaTable( + table_name="sd_owner_tbl", database=database, owners=[alpha] + ) + db.session.add(dataset) + db.session.commit() + dataset_id = dataset.id + + dataset.deleted_at = datetime(2026, 1, 1, 12, 0, 0) + db.session.commit() + + self.login(ALPHA_USERNAME) + rison_query = ( + "(filters:!((col:id,opr:dataset_deleted_state,value:only)),page_size:200)" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + ids = [r["id"] for r in json.loads(rv.data)["result"]] + assert dataset_id in ids + + self._hard_delete_created(dataset_id, database) + + def test_deleted_state_list_hides_non_owned_from_read_access_user(self) -> None: + """A read-access non-owner must not enumerate a dataset once it is + soft-deleted. + + Gamma is granted ``datasource_access`` to the dataset, so + ``DatasourceFilter`` makes it visible to gamma while live. After + soft-delete, the deleted-state list is scoped to the restore audience + (owners/admins), so gamma — who could never restore it — must not see it + via ``include`` or ``only``. + """ + admin = self.get_user(ADMIN_USERNAME) + database = Database(database_name="sd_acl_db", sqlalchemy_uri="sqlite://") + db.session.add(database) + db.session.flush() + dataset = SqlaTable(table_name="sd_acl_tbl", database=database, owners=[admin]) + db.session.add(dataset) + db.session.commit() + dataset_id = dataset.id + + gamma_role = security_manager.find_role("Gamma") + pvm = security_manager.add_permission_view_menu( + "datasource_access", dataset.perm + ) + gamma_role.permissions.append(pvm) + db.session.commit() + + try: + # Precondition: gamma can see the dataset while it is live. + self.login(GAMMA_USERNAME) + rv = self.client.get("/api/v1/dataset/?q=(page_size:200)") + assert dataset_id in [r["id"] for r in json.loads(rv.data)["result"]], ( + "precondition: gamma should see the live dataset via datasource access" + ) + + reloaded = ( + db.session.query(SqlaTable) + .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}}) + .filter(SqlaTable.id == dataset_id) + .one() + ) + reloaded.deleted_at = datetime(2026, 1, 1, 12, 0, 0) + db.session.commit() + + for value in ("include", "only"): + rison_query = ( + f"(filters:!((col:id,opr:dataset_deleted_state,value:{value}))," + "page_size:200)" + ) + rv = self.client.get(f"/api/v1/dataset/?q={rison_query}") + assert rv.status_code == 200 + ids = [r["id"] for r in json.loads(rv.data)["result"]] + assert dataset_id not in ids, ( + "read-access non-owner must not enumerate a soft-deleted " + f"dataset via dataset_deleted_state={value}" + ) + finally: + pvm = security_manager.find_permission_view_menu( + "datasource_access", dataset.perm + ) + if pvm: + security_manager.del_permission_role(gamma_role, pvm) + db.session.commit() + self._hard_delete_created(dataset_id, database) + + def test_no_cascade_to_dependent_charts(self) -> None: + """Soft-deleting a dataset should NOT cascade to its charts (FR-009, T018).""" + dataset_id = self._get_example_dataset_id() + self.login(ADMIN_USERNAME) + + # Find charts that depend on this dataset + dependent_charts = ( + db.session.query(Slice) + .filter(Slice.datasource_id == dataset_id, Slice.datasource_type == "table") + .all() + ) + dependent_chart_ids = [c.id for c in dependent_charts] + + try: + # Soft-delete the dataset + rv = self.client.delete(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + # Dependent charts should still be active (no cascade). On this + # branch ``Slice`` does not yet carry ``deleted_at`` (added by the + # charts soft-delete PR), so we only verify the row is still + # loadable through the default visibility-filtered query — which + # would return None if the chart had been soft-deleted. + for chart_id in dependent_chart_ids: + chart = ( + db.session.query(Slice).filter(Slice.id == chart_id).one_or_none() + ) + assert chart is not None, f"Chart {chart_id} should still be active" + finally: + self._restore_dataset(dataset_id) + + +class TestDatasetRestore(SupersetTestCase): + """Tests for dataset restore behaviour (T027).""" + + def _get_example_dataset(self) -> SqlaTable: + dataset = db.session.query(SqlaTable).first() + assert dataset is not None + return dataset + + def test_restore_soft_deleted_dataset(self) -> None: + """POST /api/v1/dataset/<uuid>/restore should make it visible again.""" + dataset = self._get_example_dataset() + dataset_id = dataset.id + dataset_uuid = str(dataset.uuid) + self.login(ADMIN_USERNAME) + + self.client.delete(f"/api/v1/dataset/{dataset_id}") + + rv = self.client.post(f"/api/v1/dataset/{dataset_uuid}/restore") + assert rv.status_code == 200 + + rv = self.client.get(f"/api/v1/dataset/{dataset_id}") + assert rv.status_code == 200 + + def test_restore_failure_returns_422(self) -> None: + """A failure during restore surfaces as a clean 422 via the + ``DatasetRestoreFailedError`` handler rather than an unhandled 500. + + ``RestoreDatasetCommand.run`` wraps the restore in ``@transaction`` + and rethrows ``DatasetRestoreFailedError`` on any underlying + SQLAlchemy error; this pins that the endpoint maps it to 422. + """ + from unittest.mock import patch + + from superset.commands.dataset.exceptions import ( + DatasetRestoreFailedError, + ) + + dataset = self._get_example_dataset() + dataset_id = dataset.id + dataset_uuid = str(dataset.uuid) + self.login(ADMIN_USERNAME) + self.client.delete(f"/api/v1/dataset/{dataset_id}") + + with patch( + "superset.commands.dataset.restore.RestoreDatasetCommand.run", + side_effect=DatasetRestoreFailedError(), + ): + rv = self.client.post(f"/api/v1/dataset/{dataset_uuid}/restore") + assert rv.status_code == 422 + + # Cleanup: the mocked restore left the example dataset soft-deleted. Review Comment: **Suggestion:** Cleanup runs only after the status assertion, so if the endpoint returns a non-422 response the example dataset remains soft-deleted and contaminates later tests. Put `_restore_dataset(dataset_id)` in a `finally` block to guarantee restoration on failure paths. [missing cleanup] <details> <summary><b>Severity Level:</b> Major ⚠️</summary> ```mdx - ⚠️ Failed restore-422 test leaves example dataset soft-deleted. - ⚠️ Subsequent tests may misbehave due to missing dataset. ``` </details> <details> <summary><b>Steps of Reproduction ✅ </b></summary> ```mdx 1. Run `TestDatasetRestore.test_restore_failure_returns_422` in `tests/integration_tests/datasets/soft_delete_tests.py:5-34` (around lines 305-339 in the PR hunk). 2. The test obtains a shared example dataset via `_get_example_dataset()` (lines 64-67), records its `id` and `uuid`, logs in as `ADMIN_USERNAME`, and soft-deletes it with `DELETE /api/v1/dataset/<id>` (line 23). 3. It patches `RestoreDatasetCommand.run` to raise `DatasetRestoreFailedError` and posts to `/api/v1/dataset/<uuid>/restore` (lines 25-29), then asserts `rv.status_code == 422` (line 30). If the endpoint incorrectly maps `DatasetRestoreFailedError` (e.g., returns 500 or 200 instead of 422), this assertion fails and raises before executing any further lines. 4. Because the cleanup `_restore_dataset(dataset_id)` (line 33) is placed after the assertion and not inside a `finally` block, the failing assertion leaves the example dataset soft-deleted in the database, so later tests in `TestDatasetRestore` that rely on `_get_example_dataset()` and on example datasets being live now operate against mutated state. ``` </details> [Fix in Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=79bfd15fdd714a5fab0de5a512d1893a&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) | [Fix in VSCode Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=79bfd15fdd714a5fab0de5a512d1893a&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) *(Use Cmd/Ctrl + Click for best experience)* <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is a comment left during a code review. **Path:** tests/integration_tests/datasets/soft_delete_tests.py **Line:** 331:339 **Comment:** *Missing Cleanup: Cleanup runs only after the status assertion, so if the endpoint returns a non-422 response the example dataset remains soft-deleted and contaminates later tests. Put `_restore_dataset(dataset_id)` in a `finally` block to guarantee restoration on failure paths. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix ``` </details> <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=4fb5dd859d416a3cd9fdfd9088a31cb47454a404e42eb9f7c125c58680172ecb&reaction=like'>👍</a> | <a href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=4fb5dd859d416a3cd9fdfd9088a31cb47454a404e42eb9f7c125c58680172ecb&reaction=dislike'>👎</a> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
