This is an automated email from the ASF dual-hosted git repository. utkarsharma pushed a commit to branch v2-9-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 4d9407183c6d2c178b90e4c6cb43918d0d6afa3f Author: htpawel <[email protected]> AuthorDate: Fri Jun 14 16:58:42 2024 +0200 Fix dag task scheduled and queued duration metrics (#37936) (cherry picked from commit bffb7b08c300a79ace933b396a91220af5afa919) --- airflow/models/taskinstance.py | 4 ++-- .../logging-monitoring/metrics.rst | 28 +++++++++++----------- newsfragments/37936.significant.rst | 4 ++++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 2fd73ef31f..c36fab35ce 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -2391,7 +2391,7 @@ class TaskInstance(Base, LoggingMixin): self.task_id, ) return - timing = (timezone.utcnow() - self.queued_dttm).total_seconds() + timing = timezone.utcnow() - self.queued_dttm elif new_state == TaskInstanceState.QUEUED: metric_name = "scheduled_duration" if self.start_date is None: @@ -2404,7 +2404,7 @@ class TaskInstance(Base, LoggingMixin): self.task_id, ) return - timing = (timezone.utcnow() - self.start_date).total_seconds() + timing = timezone.utcnow() - self.start_date else: raise NotImplementedError("no metric emission setup for state %s", new_state) diff --git a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst index 3a8fc3c19f..8310125fce 100644 --- a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst +++ b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst @@ -253,21 +253,21 @@ Name Description ================================================================ ======================================================================== ``dagrun.dependency-check.<dag_id>`` Milliseconds taken to check DAG dependencies ``dagrun.dependency-check`` Milliseconds taken to check DAG dependencies. Metric with dag_id tagging. -``dag.<dag_id>.<task_id>.duration`` Seconds taken to run a task -``task.duration`` Seconds taken to run a task. Metric with dag_id and task-id tagging. -``dag.<dag_id>.<task_id>.scheduled_duration`` Seconds a task spends in the Scheduled state, before being Queued -``task.scheduled_duration`` Seconds a task spends in the Scheduled state, before being Queued. +``dag.<dag_id>.<task_id>.duration`` Milliseconds taken to run a task +``task.duration`` Milliseconds taken to run a task. Metric with dag_id and task-id tagging. +``dag.<dag_id>.<task_id>.scheduled_duration`` Milliseconds a task spends in the Scheduled state, before being Queued +``task.scheduled_duration`` Milliseconds a task spends in the Scheduled state, before being Queued. Metric with dag_id and task_id tagging. -``dag.<dag_id>.<task_id>.queued_duration`` Seconds a task spends in the Queued state, before being Running -``task.queued_duration`` Seconds a task spends in the Queued state, before being Running. +``dag.<dag_id>.<task_id>.queued_duration`` Milliseconds a task spends in the Queued state, before being Running +``task.queued_duration`` Milliseconds a task spends in the Queued state, before being Running. Metric with dag_id and task_id tagging. -``dag_processing.last_duration.<dag_file>`` Seconds taken to load the given DAG file -``dag_processing.last_duration`` Seconds taken to load the given DAG file. Metric with file_name tagging. -``dagrun.duration.success.<dag_id>`` Seconds taken for a DagRun to reach success state -``dagrun.duration.success`` Seconds taken for a DagRun to reach success state. +``dag_processing.last_duration.<dag_file>`` Milliseconds taken to load the given DAG file +``dag_processing.last_duration`` Milliseconds taken to load the given DAG file. Metric with file_name tagging. +``dagrun.duration.success.<dag_id>`` Milliseconds taken for a DagRun to reach success state +``dagrun.duration.success`` Milliseconds taken for a DagRun to reach success state. Metric with dag_id and run_type tagging. -``dagrun.duration.failed.<dag_id>`` Seconds taken for a DagRun to reach failed state -``dagrun.duration.failed`` Seconds taken for a DagRun to reach failed state. +``dagrun.duration.failed.<dag_id>`` Milliseconds taken for a DagRun to reach failed state +``dagrun.duration.failed`` Milliseconds taken for a DagRun to reach failed state. Metric with dag_id and run_type tagging. ``dagrun.schedule_delay.<dag_id>`` Milliseconds of delay between the scheduled DagRun start date and the actual DagRun start date @@ -277,8 +277,8 @@ Name Description only a single scheduler can enter this loop at a time ``scheduler.critical_section_query_duration`` Milliseconds spent running the critical section task instance query ``scheduler.scheduler_loop_duration`` Milliseconds spent running one scheduler loop -``dagrun.<dag_id>.first_task_scheduling_delay`` Seconds elapsed between first task start_date and dagrun expected start -``dagrun.first_task_scheduling_delay`` Seconds elapsed between first task start_date and dagrun expected start. +``dagrun.<dag_id>.first_task_scheduling_delay`` Milliseconds elapsed between first task start_date and dagrun expected start +``dagrun.first_task_scheduling_delay`` Milliseconds elapsed between first task start_date and dagrun expected start. Metric with dag_id and run_type tagging. ``collect_db_dags`` Milliseconds taken for fetching all Serialized Dags from DB ``kubernetes_executor.clear_not_launched_queued_tasks.duration`` Milliseconds taken for clearing not launched queued tasks in Kubernetes Executor diff --git a/newsfragments/37936.significant.rst b/newsfragments/37936.significant.rst new file mode 100644 index 0000000000..5714bd8ae2 --- /dev/null +++ b/newsfragments/37936.significant.rst @@ -0,0 +1,4 @@ +Time unit for ``scheduled_duration`` and ``queued_duration`` changed. + +``scheduled_duration`` and ``queued_duration`` metrics are now emitted in milliseconds instead of seconds. +By convention all statsd metrics should be emitted in milliseconds, this is later expected in e.g. prometheus' statsd-exporter.
