Re: [PR] Fix clearTaskInstances API: Restore include_past/future support on UI [airflow]

via GitHub Wed, 20 Aug 2025 02:08:25 -0700


pierrejeambrun commented on code in PR #54416:
URL: https://github.com/apache/airflow/pull/54416#discussion_r2287478997



##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py:
##########
@@ -759,12 +758,12 @@ def test_offset_limit(self, test_client, 
one_task_with_many_mapped_tis):
             ({"order_by": "map_index", "limit": 100}, list(range(100))),
             ({"order_by": "-map_index", "limit": 100}, list(range(109, 9, 
-1))),
             (
-                {"order_by": "state", "limit": 108},
-                list(range(5, 25)) + list(range(25, 110)) + list(range(3)),
+                    {"order_by": "state", "limit": 108},
+                    list(range(5, 25)) + list(range(25, 110)) + list(range(3)),

Review Comment:
   Formatting diff that should be removed. You can resolve that by intalling 
pre commit hooks locally. You can take a look at the contributing documentation 
for that:
   
https://github.com/apache/airflow/blob/main/contributing-docs/08_static_code_checks.rst



##########
airflow-core/src/airflow/api_fastapi/core_api/routes/public/task_instances.py:
##########
@@ -673,13 +674,19 @@ def post_clear_task_instances(
             raise HTTPException(status.HTTP_404_NOT_FOUND, error_message)
         # Get the specific dag version:
         dag = get_dag_for_run(dag_bag, dag_run, session)
-        if past or future:
+        if (past or future) and dag_run.logical_date is None:
             raise HTTPException(
                 status.HTTP_400_BAD_REQUEST,
-                "Cannot use include_past or include_future when dag_run_id is 
provided because logical_date is not applicable.",
+                "Cannot use include_past or include_future with a manually 
triggered DAG run (logical_date is None)."
             )
-        body.start_date = dag_run.logical_date if dag_run.logical_date is not 
None else None
-        body.end_date = dag_run.logical_date if dag_run.logical_date is not 
None else None
+
+        if past or future:
+            body.start_date = dag_run.logical_date if not past else None
+            body.end_date = dag_run.logical_date if not future else None
+            dag_run_id = None # Use date-based clearing
+        else:
+            body.start_date = dag_run.logical_date
+            body.end_date = dag_run.logical_date

Review Comment:
   I don't think this logic is correct, `past` and `future` are handled bellow 
when defined by setting start_date and/or end_date to none already, I don't 
think you need to handle that here.



##########
airflow-core/src/airflow/api_fastapi/core_api/routes/public/task_instances.py:
##########
@@ -739,6 +746,7 @@ def post_clear_task_instances(
     )
 
 
+

Review Comment:
   ```suggestion
   ```



##########
airflow-core/src/airflow/api_fastapi/core_api/routes/public/task_instances.py:
##########
@@ -664,6 +664,7 @@ def post_clear_task_instances(
     downstream = body.include_downstream
     upstream = body.include_upstream
 
+    # Improved logic - resolve logical_date for scheduled DAGRun

Review Comment:
   ```suggestion
   ```



##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py:
##########
@@ -2312,10 +2311,103 @@ def 
test_dag_run_with_future_or_past_flag_returns_400(self, test_client, session
         response = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
         assert response.status_code == 400
         assert (
-            "Cannot use include_past or include_future when dag_run_id is 
provided"
+            "Cannot use include_past or include_future with a manually 
triggered DAG run (logical_date is None)."
             in response.json()["detail"]
         )
 
+    @pytest.mark.parametrize(
+        "flag, expected",
+        [
+            ("include_past", 2),  # D0 ~ D1
+            ("include_future", 2),  # D1 ~

Review Comment:
   ```suggestion
               ("include_future", 2),  # D1 ~ D2
   ```
   
   Maybe change `DX` to `TX`, T1, T2, T3, I don't understand why the Task 
abervation ends up being a `D` 



##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py:
##########
@@ -2312,10 +2311,103 @@ def 
test_dag_run_with_future_or_past_flag_returns_400(self, test_client, session
         response = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
         assert response.status_code == 400
         assert (
-            "Cannot use include_past or include_future when dag_run_id is 
provided"
+            "Cannot use include_past or include_future with a manually 
triggered DAG run (logical_date is None)."
             in response.json()["detail"]
         )
 
+    @pytest.mark.parametrize(
+        "flag, expected",
+        [
+            ("include_past", 2),  # D0 ~ D1
+            ("include_future", 2),  # D1 ~
+        ],
+    )
+    def test_with_dag_run_id_and_past_future_converts_to_date_range(self, 
test_client, session, flag,
+                                                                    expected):
+        dag_id = "example_python_operator"
+        task_instances = [
+            {"logical_date": DEFAULT_DATETIME_1, "state": State.FAILED},  # D0
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), 
"state": State.FAILED},  # D1
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), 
"state": State.FAILED},  # D2
+        ]
+        self.create_task_instances(
+            session, dag_id=dag_id, task_instances=task_instances, 
update_extras=False
+        )
+        payload = {
+            "dry_run": True,
+            "only_failed": True,
+            "dag_run_id": "TEST_DAG_RUN_ID_1",
+            flag: True,
+        }
+        resp = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
+        assert resp.status_code == 200
+        assert resp.json()["total_entries"] == expected  # include_past => 
D0,D1 / include_future => D1,D2
+
+    def test_with_dag_run_id_and_both_past_and_future_means_full_range(self, 
test_client, session):
+        dag_id = "example_python_operator"
+        task_instances = [
+            {"logical_date": DEFAULT_DATETIME_1 - dt.timedelta(days=1), 
"state": State.FAILED},  # D0
+            {"logical_date": DEFAULT_DATETIME_1, "state": State.FAILED},  #D1
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), 
"state": State.FAILED},  # D2
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), 
"state": State.FAILED},  # D3
+            {"logical_date": None, "state": State.FAILED},  # D4
+        ]
+        self.create_task_instances(
+            session, dag_id=dag_id, task_instances=task_instances, 
update_extras=False
+        )
+        payload = {
+            "dry_run": True,
+            "only_failed": False,
+            "dag_run_id": "TEST_DAG_RUN_ID_1",  # D1
+            "include_past": True,
+            "include_future": True,
+        }
+        resp = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
+        assert resp.status_code == 200
+        assert resp.json()["total_entries"] == 5 #D0 ~ #D4
+
+    def test_with_dag_run_id_only_uses_run_id_based_clearing(self, 
test_client, session):
+        dag_id = "example_python_operator"
+        task_instances = [
+            {"logical_date": DEFAULT_DATETIME_1, "state": State.SUCCESS},  # D0
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), 
"state": State.FAILED},  # D1
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), 
"state": State.SUCCESS},  # D2
+        ]
+        self.create_task_instances(
+            session, dag_id=dag_id, task_instances=task_instances, 
update_extras=False
+        )
+        payload = {
+            "dry_run": True,
+            "only_failed": True,
+            "dag_run_id": "TEST_DAG_RUN_ID_1",
+        }
+        resp = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
+        assert resp.status_code == 200
+        a = resp.json()
+        assert resp.json()["total_entries"] == 1
+        assert resp.json()["task_instances"][0]["logical_date"] == 
(DEFAULT_DATETIME_1 + dt.timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%SZ") #D1
+
+    @pytest.mark.parametrize("flag", ["include_future", "include_past"])
+    def test_manual_run_with_none_logical_date_returns_400_kept(self, 
test_client, session, flag):

Review Comment:
   How is this test different from 
`test_manual_run_with_none_logical_date_returns_400` 



##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py:
##########
@@ -2312,10 +2311,103 @@ def 
test_dag_run_with_future_or_past_flag_returns_400(self, test_client, session
         response = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
         assert response.status_code == 400
         assert (
-            "Cannot use include_past or include_future when dag_run_id is 
provided"
+            "Cannot use include_past or include_future with a manually 
triggered DAG run (logical_date is None)."
             in response.json()["detail"]
         )
 
+    @pytest.mark.parametrize(
+        "flag, expected",
+        [
+            ("include_past", 2),  # D0 ~ D1
+            ("include_future", 2),  # D1 ~
+        ],
+    )
+    def test_with_dag_run_id_and_past_future_converts_to_date_range(self, 
test_client, session, flag,
+                                                                    expected):
+        dag_id = "example_python_operator"
+        task_instances = [
+            {"logical_date": DEFAULT_DATETIME_1, "state": State.FAILED},  # D0
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), 
"state": State.FAILED},  # D1
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), 
"state": State.FAILED},  # D2
+        ]
+        self.create_task_instances(
+            session, dag_id=dag_id, task_instances=task_instances, 
update_extras=False
+        )
+        payload = {
+            "dry_run": True,
+            "only_failed": True,
+            "dag_run_id": "TEST_DAG_RUN_ID_1",
+            flag: True,
+        }
+        resp = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
+        assert resp.status_code == 200
+        assert resp.json()["total_entries"] == expected  # include_past => 
D0,D1 / include_future => D1,D2
+
+    def test_with_dag_run_id_and_both_past_and_future_means_full_range(self, 
test_client, session):
+        dag_id = "example_python_operator"
+        task_instances = [
+            {"logical_date": DEFAULT_DATETIME_1 - dt.timedelta(days=1), 
"state": State.FAILED},  # D0
+            {"logical_date": DEFAULT_DATETIME_1, "state": State.FAILED},  #D1
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), 
"state": State.FAILED},  # D2
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), 
"state": State.FAILED},  # D3
+            {"logical_date": None, "state": State.FAILED},  # D4
+        ]
+        self.create_task_instances(
+            session, dag_id=dag_id, task_instances=task_instances, 
update_extras=False
+        )
+        payload = {
+            "dry_run": True,
+            "only_failed": False,
+            "dag_run_id": "TEST_DAG_RUN_ID_1",  # D1
+            "include_past": True,
+            "include_future": True,
+        }
+        resp = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
+        assert resp.status_code == 200
+        assert resp.json()["total_entries"] == 5 #D0 ~ #D4
+
+    def test_with_dag_run_id_only_uses_run_id_based_clearing(self, 
test_client, session):
+        dag_id = "example_python_operator"
+        task_instances = [
+            {"logical_date": DEFAULT_DATETIME_1, "state": State.SUCCESS},  # D0
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=1), 
"state": State.FAILED},  # D1
+            {"logical_date": DEFAULT_DATETIME_1 + dt.timedelta(days=2), 
"state": State.SUCCESS},  # D2
+        ]
+        self.create_task_instances(
+            session, dag_id=dag_id, task_instances=task_instances, 
update_extras=False
+        )
+        payload = {
+            "dry_run": True,
+            "only_failed": True,
+            "dag_run_id": "TEST_DAG_RUN_ID_1",
+        }
+        resp = test_client.post(f"/dags/{dag_id}/clearTaskInstances", 
json=payload)
+        assert resp.status_code == 200
+        a = resp.json()
+        assert resp.json()["total_entries"] == 1
+        assert resp.json()["task_instances"][0]["logical_date"] == 
(DEFAULT_DATETIME_1 + dt.timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%SZ") #D1
+

Review Comment:
   Here for the right hand side of the operator, you can hardcode the expected 
string value if that makes it easier.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Fix clearTaskInstances API: Restore include_past/future support on UI [airflow]

Reply via email to