codeant-ai-for-open-source[bot] commented on code in PR #40130:
URL: https://github.com/apache/superset/pull/40130#discussion_r3350036747
##########
tests/unit_tests/datasets/commands/importers/v1/import_test.py:
##########
@@ -1014,6 +1015,349 @@ def test_import_dataset_access_check(
import_dataset(config)
+def test_import_soft_deleted_dataset_overwrite_restores_in_place(
+ mocker: MockerFixture,
+ session: Session,
+) -> None:
+ """
+ Overwrite-importing a soft-deleted dataset must restore the row in
+ place rather than hard-delete-and-replace. A hard delete would
+ cascade through the chart back-reference and table_columns /
+ sql_metrics rows; in-place restore preserves them.
+ """
+ mocker.patch.object(security_manager, "can_access", return_value=True)
+
+ engine = db.session.get_bind()
+ SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
+
+ database = Database(database_name="my_database",
sqlalchemy_uri="sqlite://")
+ db.session.add(database)
+ db.session.flush()
+
+ config = copy.deepcopy(dataset_fixture)
+ config["database_id"] = database.id
+
+ initial = import_dataset(config)
+ original_id = initial.id
+
+ existing = db.session.query(SqlaTable).filter_by(uuid=config["uuid"]).one()
+ existing.deleted_at = datetime(2026, 1, 1, 12, 0, 0)
+ db.session.flush()
+
+ admin = User(
+ first_name="Alice",
+ last_name="Doe",
+ email="[email protected]",
+ username="admin",
+ roles=[Role(name="Admin")],
+ )
+
+ with override_user(admin):
+ restored = import_dataset(config, overwrite=True)
+
+ assert restored.id == original_id
+ assert restored.deleted_at is None
+
+
+def test_import_soft_deleted_dataset_non_overwrite_restores_for_owner(
+ mocker: MockerFixture,
+ session: Session,
+) -> None:
+ """
+ Non-overwrite re-import of a soft-deleted UUID is implicitly a
+ restore-and-update: the user is bringing the dataset back by
+ uploading it again. The same ownership rule as the overwrite path
+ applies, so an owner (or admin) succeeds without setting
+ overwrite=True.
+ """
+ mocker.patch.object(security_manager, "can_access", return_value=True)
+
+ engine = db.session.get_bind()
+ SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
+
+ database = Database(database_name="my_database",
sqlalchemy_uri="sqlite://")
+ db.session.add(database)
+ db.session.flush()
+
+ config = copy.deepcopy(dataset_fixture)
+ config["database_id"] = database.id
+
+ initial = import_dataset(config)
+ original_id = initial.id
+
+ existing = db.session.query(SqlaTable).filter_by(uuid=config["uuid"]).one()
+ existing.deleted_at = datetime(2026, 1, 1, 12, 0, 0)
+ db.session.flush()
+
+ admin = User(
+ first_name="Alice",
+ last_name="Doe",
+ email="[email protected]",
+ username="admin",
+ roles=[Role(name="Admin")],
+ )
+
+ with override_user(admin):
+ restored = import_dataset(config, overwrite=False)
+
+ assert restored.id == original_id
+ assert restored.deleted_at is None
+
+
+def test_import_soft_deleted_dataset_non_overwrite_raises_for_non_owner(
+ mocker: MockerFixture,
+ session: Session,
+) -> None:
+ """
+ Non-overwrite re-import that would resurrect a soft-deleted dataset
+ must respect ownership: a non-owner without admin role cannot
+ restore-via-import. Mirrors the explicit /restore endpoint's check.
+ """
+ mocker.patch.object(security_manager, "can_access", return_value=True)
+
+ engine = db.session.get_bind()
+ SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
+
+ database = Database(database_name="my_database",
sqlalchemy_uri="sqlite://")
+ db.session.add(database)
+ db.session.flush()
+
+ config = copy.deepcopy(dataset_fixture)
+ config["database_id"] = database.id
+
+ import_dataset(config)
+ existing = db.session.query(SqlaTable).filter_by(uuid=config["uuid"]).one()
+ existing.deleted_at = datetime(2026, 1, 1, 12, 0, 0)
+ db.session.flush()
+
+ non_owner = User(
+ first_name="Bob",
+ last_name="Roe",
+ email="[email protected]",
+ username="bob",
+ roles=[Role(name="Gamma")],
+ )
+
+ with override_user(non_owner):
+ with pytest.raises(ImportFailedError) as excinfo:
+ import_dataset(config, overwrite=False)
+ assert "permissions to restore" in str(excinfo.value)
+ # Verify the permission check fired before any mutation: if a regression
+ # cleared ``deleted_at`` before raising, this would silently produce a
+ # half-restored row and the test would still pass on the message check
+ # alone.
+ db.session.refresh(existing)
+ assert existing.deleted_at is not None, (
+ "deleted_at was cleared before the exception — restore mutation "
+ "happened before the ownership check"
+ )
+
+
+def test_import_soft_deleted_dataset_raises_when_caller_lacks_can_write(
+ mocker: MockerFixture,
+ session: Session,
+) -> None:
+ """
+ Case B: re-import of a soft-deleted UUID by a caller without
+ can_write must raise, not silently return the soft-deleted row.
+
+ Real-world scenario: a user has can_write Dashboard but not
+ can_write Dataset, and they import a dashboard zip that references
+ a soft-deleted dataset. Silently returning the row would let the
+ dashboard importer wire the dashboard's charts to a deleted dataset
+ and produce broken chart loads.
+ """
+ mocker.patch.object(security_manager, "can_access", return_value=False)
+
+ engine = db.session.get_bind()
+ SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
+
+ database = Database(database_name="my_database",
sqlalchemy_uri="sqlite://")
+ db.session.add(database)
+ db.session.flush()
+
+ config = copy.deepcopy(dataset_fixture)
+ config["database_id"] = database.id
+
+ # Seed a soft-deleted dataset with the matching UUID directly, so the
+ # test doesn't need to flip permissions mid-test.
+ existing = SqlaTable(
+ table_name="soft_deleted_dataset",
+ database_id=database.id,
+ uuid=config["uuid"],
+ deleted_at=datetime(2026, 1, 1, 12, 0, 0),
+ )
+ db.session.add(existing)
+ db.session.flush()
+
+ with pytest.raises(ImportFailedError) as excinfo:
+ import_dataset(config, overwrite=False)
+ assert "can_write" in str(excinfo.value)
+ # Case B contract: deleted_at must remain set after the exception. A
+ # regression that clears deleted_at before the can_write check would
+ # leave the row in a half-restored state and silently pass the message
+ # assertion above.
+ db.session.refresh(existing)
+ assert existing.deleted_at is not None, (
+ "Case B: deleted_at was cleared before raising — mutation happened "
+ "before the can_write check"
+ )
+
+
+def test_import_soft_deleted_dataset_restore_removes_orphan_children(
+ mocker: MockerFixture,
+ session: Session,
+) -> None:
+ """
+ Restoring a soft-deleted dataset via re-import (non-overwrite,
+ Option C) syncs columns and metrics — children present in the live
+ row but absent from the uploaded config are removed, not silently
+ merged.
+
+ Without forcing sync on the implicit-restore path, ``sync=[]``
+ would mean "upsert by UUID, leave non-matching children alone",
+ so the restored dataset would carry stale columns from before the
+ soft-delete. That's a surprising merge of two states; treating
+ re-import as a clean replacement is what an explicit ``overwrite``
+ would do anyway.
+ """
+ mocker.patch.object(security_manager, "can_access", return_value=True)
+
+ engine = db.session.get_bind()
+ SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
+
+ database = Database(database_name="my_database",
sqlalchemy_uri="sqlite://")
+ db.session.add(database)
+ db.session.flush()
+
+ config = copy.deepcopy(dataset_fixture)
+ config["database_id"] = database.id
+
+ initial = import_dataset(config)
+ original_id = initial.id
+
+ existing = db.session.query(SqlaTable).filter_by(uuid=config["uuid"]).one()
+ # Add an orphan column that the upload doesn't know about.
+ orphan = TableColumn(
+ column_name="orphan_col",
+ type="STRING",
+ table=existing,
+ )
+ db.session.add(orphan)
+ existing.deleted_at = datetime(2026, 1, 1, 12, 0, 0)
+ db.session.flush()
+ orphan_uuid = orphan.uuid
+
+ admin = User(
+ first_name="Alice",
+ last_name="Doe",
+ email="[email protected]",
+ username="admin",
+ roles=[Role(name="Admin")],
+ )
+
+ with override_user(admin):
+ restored = import_dataset(config, overwrite=False)
+
+ assert restored.id == original_id
+ assert restored.deleted_at is None
+ assert orphan_uuid not in {c.uuid for c in restored.columns}, (
+ "orphan column survived restore-via-import; the implicit-restore "
+ "path must force sync so re-import is a clean replacement"
+ )
+
+
+def test_import_dataset_multiple_results_fallback_finds_soft_deleted(
+ mocker: MockerFixture,
+ session: Session,
+) -> None:
+ """
+ The MultipleResultsFound exception handler inside import_dataset does
+ a second lookup by uuid via ``.one()``. That fallback bypasses the
+ soft-delete visibility filter so a soft-deleted duplicate is still
+ returnable — without the bypass the listener would hide it and the
+ fallback's ``.one()`` would raise ``NoResultFound``, masking the
+ original ``MultipleResultsFound`` with a misleading error.
+
+ Reproduce: monkey-patch ``SqlaTable.import_from_dict`` to raise
+ ``MultipleResultsFound`` and seed two rows with the same uuid (one
+ live, one soft-deleted). The fallback's ``.one()`` would normally
+ return ambiguous, so we narrow with the bypass-aware filter.
+ """
+ from sqlalchemy.exc import MultipleResultsFound
+
+ mocker.patch.object(security_manager, "can_access", return_value=True)
+ mocker.patch.object(
+ SqlaTable,
+ "import_from_dict",
+ side_effect=MultipleResultsFound("simulated duplicate"),
+ )
+
+ engine = db.session.get_bind()
+ SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
+
+ database = Database(database_name="my_database",
sqlalchemy_uri="sqlite://")
+ db.session.add(database)
+ db.session.flush()
+
+ config = copy.deepcopy(dataset_fixture)
+ config["database_id"] = database.id
+
+ # Seed a soft-deleted row with the target uuid directly. Without the
+ # bypass on the fallback query, the listener would hide this row and
+ # .one() would raise NoResultFound.
+ soft_deleted = SqlaTable(
+ table_name="ambiguous_dataset",
+ database_id=database.id,
+ uuid=config["uuid"],
+ deleted_at=datetime(2026, 1, 1, 12, 0, 0),
+ )
+ db.session.add(soft_deleted)
+ db.session.flush()
+
+ # Should return the row (the only one matching the uuid), not raise
+ # NoResultFound. The importer clears ``deleted_at`` on the soft-deleted
+ # match before ``import_from_dict`` is called, so the row returned by
+ # the fallback no longer reports as soft-deleted — what we verify here
+ # is that the bypass found the row at all rather than masking the
+ # original ``MultipleResultsFound`` with a misleading error.
+ result = import_dataset(config)
+ assert result.uuid == config["uuid"]
Review Comment:
**Suggestion:** This assertion path is incompatible with the current
importer contract for soft-deleted matches when `SqlaTable.import_from_dict`
raises `MultipleResultsFound`: `import_dataset` now rolls back the restore and
raises `ImportFailedError` instead of returning a dataset. The test should
assert the expected exception (or use a non-soft-deleted setup) to avoid a
deterministic failure. [api mismatch]
<details>
<summary><b>Severity Level:</b> Critical 🚨</summary>
```mdx
- ❌ Unit test for soft-deleted MultipleResultsFound path always fails.
- ⚠️ Importer's new soft-delete contract not correctly asserted by tests.
```
</details>
<details>
<summary><b>Steps of Reproduction ✅ </b></summary>
```mdx
1. In
`tests/unit_tests/datasets/commands/importers/v1/import_test.py:281-337`, test
`test_import_dataset_multiple_results_fallback_finds_soft_deleted` patches
`SqlaTable.import_from_dict` to raise `MultipleResultsFound` and seeds a
soft-deleted
`SqlaTable` row with `uuid=config["uuid"]`.
2. The test then calls `result = import_dataset(config)` at line 1324
expecting a
successful return and asserts `result.uuid == config["uuid"]` at line 1325.
3. In the importer implementation at
`superset/commands/dataset/importers/v1/utils.py:10-103`, `import_dataset()`
calls
`find_existing_for_import(SqlaTable, config["uuid"])` (line 61), which
returns the
soft-deleted row while bypassing the visibility filter
(`find_existing_for_import` at
lines 405-426).
4. Because `existing.deleted_at` is not `None`, `import_dataset()` takes the
soft-deleted
mutation path (lines 103-135), clears `deleted_at`, and sets
`is_soft_deleted_match =
True`; when the patched `SqlaTable.import_from_dict` raises
`MultipleResultsFound`, the
`except MultipleResultsFound` block at lines 178-203 detects
`is_soft_deleted_match` and,
by design, restores `existing.deleted_at` and raises `ImportFailedError`
instead of
returning a dataset, so the assertion `result.uuid == config["uuid"]` at
line 1325 is
never reached and the test deterministically fails.
```
</details>
[Fix in
Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=e10f38ce53a44b169f573366f7cc211b&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset)
| [Fix in VSCode
Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=e10f38ce53a44b169f573366f7cc211b&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset)
*(Use Cmd/Ctrl + Click for best experience)*
<details>
<summary><b>Prompt for AI Agent 🤖 </b></summary>
```mdx
This is a comment left during a code review.
**Path:** tests/unit_tests/datasets/commands/importers/v1/import_test.py
**Line:** 1324:1325
**Comment:**
*Api Mismatch: This assertion path is incompatible with the current
importer contract for soft-deleted matches when `SqlaTable.import_from_dict`
raises `MultipleResultsFound`: `import_dataset` now rolls back the restore and
raises `ImportFailedError` instead of returning a dataset. The test should
assert the expected exception (or use a non-soft-deleted setup) to avoid a
deterministic failure.
Validate the correctness of the flagged issue. If correct, How can I resolve
this? If you propose a fix, implement it and please make it concise.
Once fix is implemented, also check other comments on the same PR, and ask
user if the user wants to fix the rest of the comments as well. if said yes,
then fetch all the comments validate the correctness and implement a minimal fix
```
</details>
<a
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=078cba8ff4c422ab7dd9b7f97c4b539be264626956442c32e51968489f4ee47c&reaction=like'>👍</a>
| <a
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F40130&comment_hash=078cba8ff4c422ab7dd9b7f97c4b539be264626956442c32e51968489f4ee47c&reaction=dislike'>👎</a>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]