This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new e90241a56d Add critical section query duration metric (#27700)
e90241a56d is described below
commit e90241a56d2c5231d5640a753cbdcff04ee20c82
Author: Michael Petro <[email protected]>
AuthorDate: Thu Nov 17 20:21:53 2022 -0500
Add critical section query duration metric (#27700)
---
airflow/jobs/scheduler_job.py | 21 +++++++++++++++------
docs/apache-airflow/logging-monitoring/metrics.rst | 1 +
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py
index eba3c27831..47eba3ed93 100644
--- a/airflow/jobs/scheduler_job.py
+++ b/airflow/jobs/scheduler_job.py
@@ -326,12 +326,21 @@ class SchedulerJob(BaseJob):
query = query.limit(max_tis)
- task_instances_to_examine: list[TI] = with_row_locks(
- query,
- of=TI,
- session=session,
- **skip_locked(session=session),
- ).all()
+ timer = Stats.timer("scheduler.critical_section_query_duration")
+ timer.start()
+
+ try:
+ task_instances_to_examine: list[TI] = with_row_locks(
+ query,
+ of=TI,
+ session=session,
+ **skip_locked(session=session),
+ ).all()
+ timer.stop(send=True)
+ except OperationalError as e:
+ timer.stop(send=False)
+ raise e
+
# TODO[HA]: This was wrong before anyway, as it only looked at a
sub-set of dags, not everything.
# Stats.gauge('scheduler.tasks.pending',
len(task_instances_to_examine))
diff --git a/docs/apache-airflow/logging-monitoring/metrics.rst
b/docs/apache-airflow/logging-monitoring/metrics.rst
index e81e10cab2..e37c5aa785 100644
--- a/docs/apache-airflow/logging-monitoring/metrics.rst
+++ b/docs/apache-airflow/logging-monitoring/metrics.rst
@@ -161,6 +161,7 @@ Name
Description
start date and the actual
DagRun start date
``scheduler.critical_section_duration`` Milliseconds spent in the
critical section of scheduler loop --
only a single scheduler
can enter this loop at a time
+``scheduler.critical_section_query_duration`` Milliseconds spent running
the critical section task instance query
``scheduler.scheduler_loop_duration`` Milliseconds spent running
one scheduler loop
``dagrun.<dag_id>.first_task_scheduling_delay`` Seconds elapsed between
first task start_date and dagrun expected start
``collect_db_dags`` Milliseconds taken for
fetching all Serialized Dags from DB