ashb commented on code in PR #64109:
URL: https://github.com/apache/airflow/pull/64109#discussion_r3353864208


##########
airflow-core/tests/unit/jobs/test_scheduler_job.py:
##########
@@ -3772,6 +3775,94 @@ def 
test_runs_are_created_after_max_active_runs_was_reached(self, dag_maker, ses
         dag_runs = DagRun.find(dag_id=dag.dag_id, session=session)
         assert len(dag_runs) == 2
 
+    def test_runs_are_not_starved_by_max_active_runs_limit(self, dag_maker, 
session):
+        """
+        Test that dagruns are not starved by max_active_runs
+        """
+        scheduler_job = Job()
+        self.job_runner = SchedulerJobRunner(job=scheduler_job, 
executors=[self.null_exec])
+
+        dag_ids = ["dag1", "dag2", "dag3"]
+
+        max_active_runs = 3
+
+        for dag_id in dag_ids:
+            with dag_maker(
+                dag_id=dag_id,
+                max_active_runs=max_active_runs,
+                session=session,
+                catchup=True,
+                schedule=timedelta(seconds=60),
+                start_date=DEFAULT_DATE,
+            ):
+                # Need to use something that doesn't immediately get marked as 
success by the scheduler
+                BashOperator(task_id="task", bash_command="true")
+
+            dag_run = dag_maker.create_dagrun(
+                state=State.QUEUED, session=session, 
run_type=DagRunType.SCHEDULED
+            )
+
+            for _ in range(50):
+                # create a bunch of dagruns in queued state, to make sure they 
are filtered by max_active_runs
+                dag_run = dag_maker.create_dagrun_after(
+                    dag_run, run_type=DagRunType.SCHEDULED, state=State.QUEUED
+                )
+
+        self.job_runner._start_queued_dagruns(session)
+        session.flush()
+
+        running_dagrun_count = session.scalar(
+            select(func.count()).select_from(DagRun).where(DagRun.state == 
DagRunState.RUNNING)
+        )
+
+        assert running_dagrun_count == max_active_runs * len(dag_ids)
+
+    def 
test_no_more_dagruns_are_set_to_running_when_max_active_runs_exceeded(self, 
dag_maker, session):

Review Comment:
   I'm not convinced this test really adequately covers the case you claim it 
does. 
   
   With max_active_runs=1 and 6 runs already RUNNING, the query's new rn <= 
max_active_runs - num_running filter never gets a chance to matter — the 
candidates that reach _start_queued_dagruns are rejected by the existing Python 
guard at scheduler_job_runner.py:2345 (active_runs >= dag_run.max_active_runs), 
which this PR doesn't touch. So running_pre == running_post holds with or 
without your query change; i suspect this test would pass on main. To actually 
cover the fix, you need a case where the query's per-DAG cap is what limits 
candidates: e.g. one DAG at its max_active_runs with non-backfill runs already 
running plus a large queued backlog, alongside a second DAG with free capacity, 
then assert the second DAG isn't starved out of the 20-candidate budget



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to