bito-code-review[bot] commented on code in PR #40130:
URL: https://github.com/apache/superset/pull/40130#discussion_r3398716271


##########
tests/integration_tests/datasets/soft_delete_tests.py:
##########
@@ -0,0 +1,462 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Integration tests for dataset soft-delete and restore."""
+
+from datetime import datetime
+
+from superset import security_manager
+from superset.connectors.sqla.models import SqlaTable
+from superset.constants import SKIP_VISIBILITY_FILTER_CLASSES
+from superset.extensions import db
+from superset.models.core import Database
+from superset.models.slice import Slice
+from superset.utils import json
+from tests.integration_tests.base_tests import SupersetTestCase
+from tests.integration_tests.constants import (
+    ADMIN_USERNAME,
+    ALPHA_USERNAME,
+    GAMMA_USERNAME,
+)
+
+
+def _restore_dataset(dataset_id: int) -> None:
+    """Restore a soft-deleted dataset (cleanup helper).
+
+    Module-level so every test class in this file can use it. Used in
+    ``finally`` blocks so a failed assertion can't strand a soft-deleted
+    row and leak it into later tests; re-queries with the
+    visibility-filter bypass and only restores if still soft-deleted.
+    """
+    row = (
+        db.session.query(SqlaTable)
+        .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}})
+        .filter(SqlaTable.id == dataset_id)
+        .one_or_none()
+    )
+    if row is not None and row.deleted_at is not None:
+        row.restore()
+        db.session.commit()
+
+
+class TestDatasetSoftDelete(SupersetTestCase):
+    """Tests for dataset soft-delete behaviour (T015, T018)."""
+
+    def _get_example_dataset_id(self) -> int:
+        """Get an existing example dataset ID for testing."""
+        dataset = db.session.query(SqlaTable).first()
+        assert dataset is not None, "No datasets found — load examples first"
+        return dataset.id
+
+    def _restore_dataset(self, dataset_id: int) -> None:
+        """Class-method shim over the module-level helper (existing call 
sites)."""
+        _restore_dataset(dataset_id)
+
+    def test_delete_dataset_soft_deletes(self) -> None:
+        """DELETE should set deleted_at instead of removing the row."""
+        dataset_id = self._get_example_dataset_id()
+        self.login(ADMIN_USERNAME)
+
+        try:
+            rv = self.client.delete(f"/api/v1/dataset/{dataset_id}")
+            assert rv.status_code == 200
+
+            row = (
+                db.session.query(SqlaTable)
+                .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: 
{SqlaTable}})
+                .filter(SqlaTable.id == dataset_id)
+                .one_or_none()
+            )
+            assert row is not None
+            assert row.deleted_at is not None
+        finally:
+            self._restore_dataset(dataset_id)
+
+    def test_soft_deleted_dataset_excluded_from_list(self) -> None:
+        """GET /api/v1/dataset/ should not include soft-deleted datasets."""
+        dataset_id = self._get_example_dataset_id()
+        self.login(ADMIN_USERNAME)
+
+        try:
+            rv = self.client.delete(f"/api/v1/dataset/{dataset_id}")
+            assert rv.status_code == 200
+
+            rv = self.client.get("/api/v1/dataset/")
+            data = json.loads(rv.data)
+            ids = [d["id"] for d in data["result"]]
+            assert dataset_id not in ids
+        finally:
+            self._restore_dataset(dataset_id)
+
+    def test_soft_deleted_dataset_included_in_list_when_requested(self) -> 
None:
+        """GET /api/v1/dataset/ with dataset_deleted_state=include returns 
deleted datasets."""  # noqa: E501
+        dataset_id = self._get_example_dataset_id()
+        self.login(ADMIN_USERNAME)
+
+        try:
+            rv = self.client.delete(f"/api/v1/dataset/{dataset_id}")
+            assert rv.status_code == 200
+
+            rison_query = (
+                "(filters:!((col:id,opr:dataset_deleted_state,value:include)))"
+            )
+            rv = self.client.get(f"/api/v1/dataset/?q={rison_query}")
+            assert rv.status_code == 200
+
+            data = json.loads(rv.data)
+            deleted_row = next(
+                (row for row in data["result"] if row["id"] == dataset_id),
+                None,
+            )
+            assert deleted_row is not None
+            assert deleted_row["deleted_at"] is not None
+        finally:
+            self._restore_dataset(dataset_id)
+
+    def test_only_filter_returns_only_soft_deleted_datasets(self) -> None:
+        """dataset_deleted_state=only excludes live rows and returns only 
deleted ones."""  # noqa: E501
+        ids = [row.id for row in db.session.query(SqlaTable).limit(2).all()]
+        assert len(ids) >= 2, "Need at least two example datasets for this 
test"
+        live_id, deleted_id = ids[0], ids[1]
+        self.login(ADMIN_USERNAME)
+
+        try:
+            rv = self.client.delete(f"/api/v1/dataset/{deleted_id}")
+            assert rv.status_code == 200
+
+            rison_query = 
"(filters:!((col:id,opr:dataset_deleted_state,value:only)))"
+            rv = self.client.get(f"/api/v1/dataset/?q={rison_query}")
+            assert rv.status_code == 200
+
+            data = json.loads(rv.data)
+            returned_ids = {row["id"] for row in data["result"]}
+            assert deleted_id in returned_ids
+            assert live_id not in returned_ids
+        finally:
+            self._restore_dataset(deleted_id)
+
+    def _hard_delete_created(self, dataset_id: int, database: Database) -> 
None:
+        """Remove a test-created dataset + its database (visibility 
bypassed)."""
+        row = (
+            db.session.query(SqlaTable)
+            .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: {SqlaTable}})
+            .filter(SqlaTable.id == dataset_id)
+            .one_or_none()
+        )
+        if row:
+            db.session.delete(row)
+        db.session.delete(database)
+        db.session.commit()
+
+    def test_deleted_state_list_shows_owner_their_own_deleted(self) -> None:
+        """A non-admin owner can still enumerate their own soft-deleted 
datasets.
+        Deleted-state scoping mirrors the restore audience, so it must not lock
+        owners out of their own trash."""
+        alpha = self.get_user(ALPHA_USERNAME)
+        database = Database(database_name="sd_owner_db", 
sqlalchemy_uri="sqlite://")
+        db.session.add(database)
+        db.session.flush()
+        dataset = SqlaTable(
+            table_name="sd_owner_tbl", database=database, owners=[alpha]
+        )
+        db.session.add(dataset)
+        db.session.commit()
+        dataset_id = dataset.id
+
+        dataset.deleted_at = datetime(2026, 1, 1, 12, 0, 0)
+        db.session.commit()
+
+        self.login(ALPHA_USERNAME)
+        rison_query = (
+            
"(filters:!((col:id,opr:dataset_deleted_state,value:only)),page_size:200)"
+        )
+        rv = self.client.get(f"/api/v1/dataset/?q={rison_query}")
+        assert rv.status_code == 200
+        ids = [r["id"] for r in json.loads(rv.data)["result"]]
+        assert dataset_id in ids
+
+        self._hard_delete_created(dataset_id, database)
+
+    def test_deleted_state_list_hides_non_owned_from_read_access_user(self) -> 
None:
+        """A read-access non-owner must not enumerate a dataset once it is
+        soft-deleted.
+
+        Gamma is granted ``datasource_access`` to the dataset, so
+        ``DatasourceFilter`` makes it visible to gamma while live. After
+        soft-delete, the deleted-state list is scoped to the restore audience
+        (owners/admins), so gamma — who could never restore it — must not see 
it
+        via ``include`` or ``only``.
+        """
+        admin = self.get_user(ADMIN_USERNAME)
+        database = Database(database_name="sd_acl_db", 
sqlalchemy_uri="sqlite://")
+        db.session.add(database)
+        db.session.flush()
+        dataset = SqlaTable(table_name="sd_acl_tbl", database=database, 
owners=[admin])
+        db.session.add(dataset)
+        db.session.commit()
+        dataset_id = dataset.id
+
+        gamma_role = security_manager.find_role("Gamma")
+        pvm = security_manager.add_permission_view_menu(
+            "datasource_access", dataset.perm
+        )
+        gamma_role.permissions.append(pvm)
+        db.session.commit()
+
+        try:
+            # Precondition: gamma can see the dataset while it is live.
+            self.login(GAMMA_USERNAME)
+            rv = self.client.get("/api/v1/dataset/?q=(page_size:200)")
+            assert dataset_id in [r["id"] for r in 
json.loads(rv.data)["result"]], (
+                "precondition: gamma should see the live dataset via 
datasource access"
+            )
+
+            reloaded = (
+                db.session.query(SqlaTable)
+                .execution_options(**{SKIP_VISIBILITY_FILTER_CLASSES: 
{SqlaTable}})
+                .filter(SqlaTable.id == dataset_id)
+                .one()
+            )
+            reloaded.deleted_at = datetime(2026, 1, 1, 12, 0, 0)
+            db.session.commit()
+
+            for value in ("include", "only"):
+                rison_query = (
+                    
f"(filters:!((col:id,opr:dataset_deleted_state,value:{value})),"
+                    "page_size:200)"
+                )
+                rv = self.client.get(f"/api/v1/dataset/?q={rison_query}")
+                assert rv.status_code == 200
+                ids = [r["id"] for r in json.loads(rv.data)["result"]]
+                assert dataset_id not in ids, (
+                    "read-access non-owner must not enumerate a soft-deleted "
+                    f"dataset via dataset_deleted_state={value}"
+                )
+        finally:
+            pvm = security_manager.find_permission_view_menu(
+                "datasource_access", dataset.perm
+            )
+            if pvm:
+                security_manager.del_permission_role(gamma_role, pvm)
+            db.session.commit()
+            self._hard_delete_created(dataset_id, database)
+
+    def test_no_cascade_to_dependent_charts(self) -> None:
+        """Soft-deleting a dataset should NOT cascade to its charts (FR-009, 
T018)."""
+        dataset_id = self._get_example_dataset_id()
+        self.login(ADMIN_USERNAME)
+
+        # Find charts that depend on this dataset
+        dependent_charts = (
+            db.session.query(Slice)
+            .filter(Slice.datasource_id == dataset_id, Slice.datasource_type 
== "table")
+            .all()
+        )
+        dependent_chart_ids = [c.id for c in dependent_charts]
+
+        try:
+            # Soft-delete the dataset
+            rv = self.client.delete(f"/api/v1/dataset/{dataset_id}")
+            assert rv.status_code == 200
+
+            # Dependent charts should still be active (no cascade). On this
+            # branch ``Slice`` does not yet carry ``deleted_at`` (added by the
+            # charts soft-delete PR), so we only verify the row is still
+            # loadable through the default visibility-filtered query — which
+            # would return None if the chart had been soft-deleted.
+            for chart_id in dependent_chart_ids:
+                chart = (
+                    db.session.query(Slice).filter(Slice.id == 
chart_id).one_or_none()
+                )
+                assert chart is not None, f"Chart {chart_id} should still be 
active"
+        finally:
+            self._restore_dataset(dataset_id)
+
+
+class TestDatasetRestore(SupersetTestCase):
+    """Tests for dataset restore behaviour (T027)."""
+
+    def _get_example_dataset(self) -> SqlaTable:
+        dataset = db.session.query(SqlaTable).first()
+        assert dataset is not None
+        return dataset
+
+    def test_restore_soft_deleted_dataset(self) -> None:
+        """POST /api/v1/dataset/<uuid>/restore should make it visible again."""
+        dataset = self._get_example_dataset()
+        dataset_id = dataset.id
+        dataset_uuid = str(dataset.uuid)
+        self.login(ADMIN_USERNAME)
+
+        self.client.delete(f"/api/v1/dataset/{dataset_id}")
+
+        rv = self.client.post(f"/api/v1/dataset/{dataset_uuid}/restore")
+        assert rv.status_code == 200
+
+        rv = self.client.get(f"/api/v1/dataset/{dataset_id}")
+        assert rv.status_code == 200
+
+    def test_restore_failure_returns_422(self) -> None:
+        """A failure during restore surfaces as a clean 422 via the
+        ``DatasetRestoreFailedError`` handler rather than an unhandled 500.
+
+        ``RestoreDatasetCommand.run`` wraps the restore in ``@transaction``
+        and rethrows ``DatasetRestoreFailedError`` on any underlying
+        SQLAlchemy error; this pins that the endpoint maps it to 422.
+        """
+        from unittest.mock import patch
+
+        from superset.commands.dataset.exceptions import (
+            DatasetRestoreFailedError,
+        )
+
+        dataset = self._get_example_dataset()
+        dataset_id = dataset.id
+        dataset_uuid = str(dataset.uuid)
+        self.login(ADMIN_USERNAME)
+        self.client.delete(f"/api/v1/dataset/{dataset_id}")
+
+        with patch(
+            "superset.commands.dataset.restore.RestoreDatasetCommand.run",
+            side_effect=DatasetRestoreFailedError(),
+        ):
+            rv = self.client.post(f"/api/v1/dataset/{dataset_uuid}/restore")
+        assert rv.status_code == 422
+
+        # Cleanup: the mocked restore left the example dataset soft-deleted.
+        _restore_dataset(dataset_id)

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Missing finally block in test cleanup</b></div>
   <div id="fix">
   
   Wrap the dataset restore call and assertion in a `try/finally` block to 
ensure `_restore_dataset(dataset_id)` always runs, mirroring the pattern in 
other tests.
   </div>
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #0a8946</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



##########
tests/integration_tests/security_tests.py:
##########
@@ -560,6 +560,72 @@ def 
test_after_update_database__perm_datasource_access(self):
         db.session.delete(tmp_db1)
         db.session.commit()
 
+    def test_after_update_database__perm_datasource_access_soft_deleted(self):
+        """A soft-deleted dataset's perm strings must still be rewritten on a
+        database rename. The maintenance query bypasses the soft-delete
+        visibility filter so hidden datasets are updated too; otherwise
+        restoring the dataset later would resurrect stale 
dataset/schema/catalog
+        permission strings pointing at the old database name.
+        """
+        from datetime import datetime  # noqa: PLC0415
+
+        from superset.constants import SKIP_VISIBILITY_FILTER_CLASSES  # noqa: 
PLC0415
+
+        security_manager.on_view_menu_after_update = Mock()

Review Comment:
   <div>
   
   
   <div id="suggestion">
   <div id="issue"><b>Mock setup without verification</b></div>
   <div id="fix">
   
   The test mocks `on_view_menu_after_update` at line 574 but never verifies 
the hook was called. The implementation at `superset/security/manager.py:2056` 
invokes this hook for each updated dataset. Compare with the existing test at 
line 548 which includes `assert_has_calls` verification.
   </div>
   
   
   </div>
   
   
   
   
   <small><i>Code Review Run #0a8946</i></small>
   </div>
   
   ---
   Should Bito avoid suggestions like this for future reviews? (<a 
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
   - [ ] Yes, avoid them



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to