Dev-iL commented on code in PR #64968:
URL: https://github.com/apache/airflow/pull/64968#discussion_r3060522276
##########
airflow-core/tests/unit/models/test_taskinstance.py:
##########
@@ -2605,6 +2605,61 @@ def
test_task_instance_history_is_created_when_ti_goes_for_retry(self, dag_maker
# the new try_id should be different from what's recorded in tih
assert tih[0].task_instance_id == try_id
+ @pytest.mark.parametrize(
+ ("first_ti", "second_ti"),
+ [
+ pytest.param(
+ ("dag_1", "run_1", "task_1", -1),
+ ("dag_2", "run_1", "task_1", -1),
+ id="tasks_with_different_dags",
+ ),
+ pytest.param(
+ ("dag_1", "run_1", "task_1", -1),
+ ("dag_1", "run_2", "task_1", -1),
+ id="tasks_with_different_runs",
+ ),
+ # There are no cases with equal dag_id/run_id because we expect
different DagRuns
+ ],
+ )
+ def test_instance_dag_and_run_id_uniqueness(self, create_task_instance,
session, first_ti, second_ti):
+ dag_id_1, run_id_1, task_id_1, map_index_1 = first_ti
+ dag_id_2, run_id_2, task_id_2, map_index_2 = second_ti
+
+ ti1 = create_task_instance(
+ dag_id=dag_id_1,
+ run_id=run_id_1,
+ task_id=task_id_1,
+ map_index=map_index_1,
+ session=session,
+ )
+ ti2 = create_task_instance(
+ dag_id=dag_id_2,
+ run_id=run_id_2,
+ task_id=task_id_2,
+ map_index=map_index_2,
+ session=session,
+ )
+
+ found_1 = TI.get_task_instance(
+ dag_id=dag_id_1,
+ run_id=run_id_1,
+ task_id=task_id_1,
+ map_index=map_index_1,
+ session=session,
+ )
+ found_2 = TI.get_task_instance(
+ dag_id=dag_id_2,
+ run_id=run_id_2,
+ task_id=task_id_2,
+ map_index=map_index_2,
Review Comment:
**Suggestion (non-blocking):** Consider adding an explicit ambiguity setup
assertion to make the test self-documenting about *why* `dag_id` matters. For
example, between creating the TIs and calling `get_task_instance`:
```python
# Verify the ambiguity that caused the original bug (issue #64957)
if dag_id_1 != dag_id_2:
ambiguous_count = session.scalar(
select(func.count()).select_from(TI).filter_by(
run_id=run_id_1, task_id=task_id_1, map_index=map_index_1
)
)
assert ambiguous_count == 2, "Setup: two TIs must match without dag_id
filter"
```
This makes the test self-documenting: a reader immediately sees that without
`dag_id` filtering, the query is ambiguous — which is the root cause of #64957.
Also, the `tasks_with_different_runs` case tests already-working behavior
(`run_id` was in `filter_by` pre-fix). It's fine as defense-in-depth, but a
brief comment noting this would help future readers understand it's not testing
the regression itself.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]