uranusjr commented on code in PR #54103:
URL: https://github.com/apache/airflow/pull/54103#discussion_r2555020477
##########
airflow-core/tests/unit/jobs/test_scheduler_job.py:
##########
@@ -1178,6 +1224,97 @@ def
test_find_executable_task_instances_executor_with_teams(self, dag_maker, moc
]
assert len(b_tis_in_wrong_executor) == 0
+ def task_helper(self, dag_maker, session, dag_id: str, task_num: int):
+ dag_tasks = {}
+
+ with dag_maker(dag_id=dag_id):
+ for i in range(task_num):
+ # Assign priority weight to certain tasks.
+ if (i % 10) == 0: # 10, 20, 30, 40, 50, ...
+ weight = int(i / 2)
+ dag_tasks[f"op{i}"] = EmptyOperator(task_id=f"dummy{i}",
priority_weight=weight)
+ else:
+ # No executor specified, runs on default executor
+ dag_tasks[f"op{i}"] = EmptyOperator(task_id=f"dummy{i}")
+
+ dag_run = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED)
+
+ task_tis = {}
+
+ tis_list = []
+ for i in range(task_num):
+ task_tis[f"ti{i}"] =
dag_run.get_task_instance(dag_tasks[f"op{i}"].task_id, session)
+ # add
+ tis_list.append(task_tis[f"ti{i}"])
+
+ for ti in tis_list:
+ ti.state = State.SCHEDULED
+ ti.dag_model.max_active_tasks = 4
+
+ session.flush()
+
+ return tis_list
+
+ @conf_vars(
+ {
+ ("scheduler", "max_tis_per_query"): "100",
+ ("scheduler", "max_dagruns_to_create_per_loop"): "10",
+ ("scheduler", "max_dagruns_per_loop_to_schedule"): "20",
+ ("core", "parallelism"): "100",
+ ("core", "max_active_tasks_per_dag"): "4",
+ ("core", "max_active_runs_per_dag"): "10",
+ ("core", "default_pool_task_slot_count"): "64",
+ }
+ )
+ def test_per_dr_limit_applied_in_task_query(self, dag_maker,
mock_executors):
+ scheduler_job = Job()
+ scheduler_job.executor.parallelism = 100
+ scheduler_job.executor.slots_available = 70
+ scheduler_job.max_tis_per_query = 100
+ self.job_runner = SchedulerJobRunner(job=scheduler_job)
+ session = settings.Session()
+
+ # Use the same run_id.
+ task_maker(dag_maker, session, "dag_1300_tasks", 1300, 4, "run1")
+ task_maker(dag_maker, session, "dag_1200_tasks", 1200, 4, "run1")
+ task_maker(dag_maker, session, "dag_1100_tasks", 1100, 4, "run1")
+ task_maker(dag_maker, session, "dag_100_tasks", 100, 4, "run1")
+ task_maker(dag_maker, session, "dag_90_tasks", 90, 4, "run1")
+ task_maker(dag_maker, session, "dag_80_tasks", 80, 4, "run1")
+
+ count = 0
+ iterations = 0
+
+ from airflow.configuration import conf
+
+ task_num = conf.getint("core", "max_active_tasks_per_dag") * 6
+
+ # 6 dags * 4 = 24.
+ assert task_num == 24
+
+ queued_tis = None
+ while count < task_num:
+ # Use `_executable_task_instances_to_queued` because it returns a
list of TIs
+ # while `_critical_section_enqueue_task_instances` just returns
the number of the TIs.
+ queued_tis = self.job_runner._executable_task_instances_to_queued(
+ max_tis=scheduler_job.executor.slots_available, session=session
+ )
+ count += len(queued_tis)
+ iterations += 1
+
+ assert iterations == 1
+ assert count == task_num
+
+ assert queued_tis is not None
+
+ dag_counts = Counter(ti.dag_id for ti in queued_tis)
+
+ # Tasks from all 6 dags should have been queued.
+ assert len(dag_counts) == 6
+ assert all(count == 4 for count in dag_counts.values()), (
+ "Count for each dag_id should be 4 but it isn't"
+ )
Review Comment:
It’d probably be easier for debugging if this just do
```python
assert dag_counts == {
"dag_1300_tasks": 4,
"dag_1200_tasks": 4,
...
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]