vincbeck commented on code in PR #61372:
URL: https://github.com/apache/airflow/pull/61372#discussion_r2759575016
##########
airflow-core/src/airflow/models/taskinstance.py:
##########
@@ -181,6 +183,48 @@ def _stop_remaining_tasks(*, task_instance: TaskInstance,
task_teardown_map=None
log.info("Not skipping teardown task '%s'", ti.task_id)
+def _recalculate_dagrun_queued_at_deadlines(
+ dagrun: DagRun, new_queued_at: datetime, session: Session
+) -> None:
+ """
+ Recalculate deadline times for deadlines that reference dagrun.queued_at.
+
+ :param dagrun: The DagRun whose deadlines should be recalculated
+ :param new_queued_at: The new queued_at timestamp to use for calculation
+ :param session: Database session
+
+ :meta private:
+ """
+ deadlines = session.scalars(
+ select(Deadline).where(
+ Deadline.dagrun_id == dagrun.id,
+ Deadline.missed == false(),
+ )
+ ).all()
+
+ if not deadlines:
+ return
+
+ for deadline in deadlines:
+ if deadline.deadline_alert_id:
Review Comment:
Should we filter them out in the query?
##########
airflow-core/src/airflow/models/taskinstance.py:
##########
@@ -181,6 +183,48 @@ def _stop_remaining_tasks(*, task_instance: TaskInstance,
task_teardown_map=None
log.info("Not skipping teardown task '%s'", ti.task_id)
+def _recalculate_dagrun_queued_at_deadlines(
+ dagrun: DagRun, new_queued_at: datetime, session: Session
+) -> None:
+ """
+ Recalculate deadline times for deadlines that reference dagrun.queued_at.
+
+ :param dagrun: The DagRun whose deadlines should be recalculated
+ :param new_queued_at: The new queued_at timestamp to use for calculation
+ :param session: Database session
+
+ :meta private:
+ """
+ deadlines = session.scalars(
+ select(Deadline).where(
+ Deadline.dagrun_id == dagrun.id,
+ Deadline.missed == false(),
+ )
+ ).all()
+
+ if not deadlines:
+ return
+
+ for deadline in deadlines:
+ if deadline.deadline_alert_id:
+ if deadline_alert := session.get(DeadlineAlertModel,
deadline.deadline_alert_id):
Review Comment:
I think we should join these 2 tables in the query instead of making one
query per deadline?
##########
airflow-core/tests/unit/models/test_dagrun.py:
##########
@@ -1397,6 +1398,88 @@ def
test_dagrun_success_handles_empty_deadline_list(self, mock_prune, dag_maker,
mock_prune.assert_not_called()
assert dag_run.state == DagRunState.SUCCESS
+ def test_clear_task_instances_recalculates_dagrun_queued_deadlines(self,
session, dag_maker):
Review Comment:
Should not it be in `test_taskinstance`? You updated `taskinstance.py` so I
expect the test to be in `test_taskinstance`
##########
airflow-core/src/airflow/models/taskinstance.py:
##########
@@ -181,6 +183,48 @@ def _stop_remaining_tasks(*, task_instance: TaskInstance,
task_teardown_map=None
log.info("Not skipping teardown task '%s'", ti.task_id)
+def _recalculate_dagrun_queued_at_deadlines(
+ dagrun: DagRun, new_queued_at: datetime, session: Session
+) -> None:
+ """
+ Recalculate deadline times for deadlines that reference dagrun.queued_at.
+
+ :param dagrun: The DagRun whose deadlines should be recalculated
+ :param new_queued_at: The new queued_at timestamp to use for calculation
+ :param session: Database session
+
+ :meta private:
+ """
+ deadlines = session.scalars(
+ select(Deadline).where(
+ Deadline.dagrun_id == dagrun.id,
+ Deadline.missed == false(),
+ )
+ ).all()
+
+ if not deadlines:
+ return
+
+ for deadline in deadlines:
+ if deadline.deadline_alert_id:
+ if deadline_alert := session.get(DeadlineAlertModel,
deadline.deadline_alert_id):
+ reference_type =
deadline_alert.reference.get(ReferenceModels.REFERENCE_TYPE_FIELD)
+ if reference_type ==
ReferenceModels.DagRunQueuedAtDeadline.__name__:
Review Comment:
Same here, if we do the join, we should filter in the query IMO
##########
airflow-core/src/airflow/models/taskinstance.py:
##########
@@ -181,6 +183,48 @@ def _stop_remaining_tasks(*, task_instance: TaskInstance,
task_teardown_map=None
log.info("Not skipping teardown task '%s'", ti.task_id)
+def _recalculate_dagrun_queued_at_deadlines(
+ dagrun: DagRun, new_queued_at: datetime, session: Session
+) -> None:
+ """
+ Recalculate deadline times for deadlines that reference dagrun.queued_at.
+
+ :param dagrun: The DagRun whose deadlines should be recalculated
+ :param new_queued_at: The new queued_at timestamp to use for calculation
+ :param session: Database session
+
+ :meta private:
+ """
+ deadlines = session.scalars(
+ select(Deadline).where(
+ Deadline.dagrun_id == dagrun.id,
+ Deadline.missed == false(),
+ )
+ ).all()
+
+ if not deadlines:
+ return
+
+ for deadline in deadlines:
+ if deadline.deadline_alert_id:
+ if deadline_alert := session.get(DeadlineAlertModel,
deadline.deadline_alert_id):
+ reference_type =
deadline_alert.reference.get(ReferenceModels.REFERENCE_TYPE_FIELD)
+ if reference_type ==
ReferenceModels.DagRunQueuedAtDeadline.__name__:
+ # We can't use evaluate_with() since the new queued_at is
not written to the DB yet.
+ deadline_interval =
timedelta(seconds=deadline_alert.interval)
+ new_deadline_time = new_queued_at + deadline_interval
+
+ log.info(
+ "Recalculating deadline %s for DagRun %s.%s: old=%s,
new=%s",
+ deadline.id,
+ dagrun.dag_id,
+ dagrun.run_id,
+ deadline.deadline_time,
+ new_deadline_time,
+ )
+ deadline.deadline_time = new_deadline_time
Review Comment:
Should not we commit the new deadline time?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]