ashb commented on code in PR #64109:
URL: https://github.com/apache/airflow/pull/64109#discussion_r3353864208
##########
airflow-core/tests/unit/jobs/test_scheduler_job.py:
##########
@@ -3772,6 +3775,94 @@ def
test_runs_are_created_after_max_active_runs_was_reached(self, dag_maker, ses
dag_runs = DagRun.find(dag_id=dag.dag_id, session=session)
assert len(dag_runs) == 2
+ def test_runs_are_not_starved_by_max_active_runs_limit(self, dag_maker,
session):
+ """
+ Test that dagruns are not starved by max_active_runs
+ """
+ scheduler_job = Job()
+ self.job_runner = SchedulerJobRunner(job=scheduler_job,
executors=[self.null_exec])
+
+ dag_ids = ["dag1", "dag2", "dag3"]
+
+ max_active_runs = 3
+
+ for dag_id in dag_ids:
+ with dag_maker(
+ dag_id=dag_id,
+ max_active_runs=max_active_runs,
+ session=session,
+ catchup=True,
+ schedule=timedelta(seconds=60),
+ start_date=DEFAULT_DATE,
+ ):
+ # Need to use something that doesn't immediately get marked as
success by the scheduler
+ BashOperator(task_id="task", bash_command="true")
+
+ dag_run = dag_maker.create_dagrun(
+ state=State.QUEUED, session=session,
run_type=DagRunType.SCHEDULED
+ )
+
+ for _ in range(50):
+ # create a bunch of dagruns in queued state, to make sure they
are filtered by max_active_runs
+ dag_run = dag_maker.create_dagrun_after(
+ dag_run, run_type=DagRunType.SCHEDULED, state=State.QUEUED
+ )
+
+ self.job_runner._start_queued_dagruns(session)
+ session.flush()
+
+ running_dagrun_count = session.scalar(
+ select(func.count()).select_from(DagRun).where(DagRun.state ==
DagRunState.RUNNING)
+ )
+
+ assert running_dagrun_count == max_active_runs * len(dag_ids)
+
+ def
test_no_more_dagruns_are_set_to_running_when_max_active_runs_exceeded(self,
dag_maker, session):
Review Comment:
I'm not convinced this test really adequately covers the case you claim it
does.
With max_active_runs=1 and 6 runs already RUNNING, the query's new rn <=
max_active_runs - num_running filter never gets a chance to matter — the
candidates that reach _start_queued_dagruns are rejected by the existing Python
guard at scheduler_job_runner.py:2345 (active_runs >= dag_run.max_active_runs),
which this PR doesn't touch. So running_pre == running_post holds with or
without your query change; i suspect this test would pass on main. To actually
cover the fix, you need a case where the query's per-DAG cap is what limits
candidates: e.g. one DAG at its max_active_runs with non-backfill runs already
running plus a large queued backlog, alongside a second DAG with free capacity,
then assert the second DAG isn't starved out of the 20-candidate budget
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]