This is an automated email from the ASF dual-hosted git repository.
kaxilnaik pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 4e10c25 Rename ``processor_poll_interval`` to
``scheduler_idle_sleep_time`` (#18704)
4e10c25 is described below
commit 4e10c25484ade62975953ed42cd9cc03ae45979d
Author: Kaxil Naik <[email protected]>
AuthorDate: Tue Oct 5 21:24:59 2021 +0530
Rename ``processor_poll_interval`` to ``scheduler_idle_sleep_time`` (#18704)
`[scheduler] processor_poll_interval` setting in `airflow.cfg` has been
renamed to `[scheduler] scheduler_idle_sleep_time`
for better understanding.
---
UPDATING.md | 21 +++++++++++++++++++++
airflow/config_templates/config.yml | 7 +++----
airflow/config_templates/default_airflow.cfg | 5 ++---
airflow/configuration.py | 1 +
airflow/jobs/scheduler_job.py | 21 ++++++++++++++++-----
docs/apache-airflow/best-practices.rst | 2 +-
docs/apache-airflow/concepts/scheduler.rst | 2 +-
7 files changed, 45 insertions(+), 14 deletions(-)
diff --git a/UPDATING.md b/UPDATING.md
index 1c291d3..047a920 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -177,6 +177,27 @@ with DAG(dag_id="task_concurrency_example"):
BashOperator(task_id="t1", max_active_tis_per_dag=2, bash_command="echo
Hi")
```
+### `processor_poll_interval` config have been renamed to
`scheduler_idle_sleep_time`
+
+`[scheduler] processor_poll_interval` setting in `airflow.cfg` has been
renamed to `[scheduler] scheduler_idle_sleep_time`
+for better understanding.
+
+It controls the 'time to sleep' at the end of the Scheduler loop if nothing
was scheduled inside `SchedulerJob`.
+
+**Before**:
+
+```ini
+[scheduler]
+processor_poll_interval = 16
+```
+
+**Now**:
+
+```ini
+[scheduler]
+scheduler_idle_sleep_time = 16
+```
+
### Marking success/failed automatically clears failed downstream tasks
When marking a task success/failed in Graph View, its downstream tasks that
are in failed/upstream_failed state are automatically cleared.
diff --git a/airflow/config_templates/config.yml
b/airflow/config_templates/config.yml
index 4c7cfa8..56a36ef 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -1739,13 +1739,12 @@
type: string
example: ~
default: "-1"
- - name: processor_poll_interval
+ - name: scheduler_idle_sleep_time
description: |
Controls how long the scheduler will sleep between loops, but if there
was nothing to do
in the loop. i.e. if it scheduled something then it will start the
next loop
- iteration straight away. This parameter is badly named (historical
reasons) and it will be
- renamed in the future with deprecation of the current name.
- version_added: 1.10.6
+ iteration straight away.
+ version_added: 2.2.0
type: string
example: ~
default: "1"
diff --git a/airflow/config_templates/default_airflow.cfg
b/airflow/config_templates/default_airflow.cfg
index 464d5f1..54315ca 100644
--- a/airflow/config_templates/default_airflow.cfg
+++ b/airflow/config_templates/default_airflow.cfg
@@ -870,9 +870,8 @@ num_runs = -1
# Controls how long the scheduler will sleep between loops, but if there was
nothing to do
# in the loop. i.e. if it scheduled something then it will start the next loop
-# iteration straight away. This parameter is badly named (historical reasons)
and it will be
-# renamed in the future with deprecation of the current name.
-processor_poll_interval = 1
+# iteration straight away.
+scheduler_idle_sleep_time = 1
# Number of seconds after which a DAG file is parsed. The DAG file is parsed
every
# ``min_file_process_interval`` number of seconds. Updates to DAGs are
reflected after
diff --git a/airflow/configuration.py b/airflow/configuration.py
index 2e3e536..e120b26 100644
--- a/airflow/configuration.py
+++ b/airflow/configuration.py
@@ -167,6 +167,7 @@ class AirflowConfigParser(ConfigParser):
('metrics', 'statsd_datadog_tags'): ('scheduler',
'statsd_datadog_tags', '2.0.0'),
('metrics', 'statsd_custom_client_path'): ('scheduler',
'statsd_custom_client_path', '2.0.0'),
('scheduler', 'parsing_processes'): ('scheduler', 'max_threads',
'1.10.14'),
+ ('scheduler', 'scheduler_idle_sleep_time'): ('scheduler',
'processor_poll_interval', '2.2.0'),
('operators', 'default_queue'): ('celery', 'default_queue', '2.1.0'),
('core', 'hide_sensitive_var_conn_fields'): ('admin',
'hide_sensitive_variable_fields', '2.1.0'),
('core', 'sensitive_var_conn_names'): ('admin',
'sensitive_variable_fields', '2.1.0'),
diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py
index a92040b..d0c016c 100644
--- a/airflow/jobs/scheduler_job.py
+++ b/airflow/jobs/scheduler_job.py
@@ -24,6 +24,7 @@ import os
import signal
import sys
import time
+import warnings
from collections import defaultdict
from datetime import timedelta
from typing import Collection, DefaultDict, Dict, List, Optional, Tuple
@@ -86,9 +87,9 @@ class SchedulerJob(BaseJob):
:param num_times_parse_dags: The number of times to try to parse each DAG
file.
-1 for unlimited times.
:type num_times_parse_dags: int
- :param processor_poll_interval: The number of seconds to wait between
+ :param scheduler_idle_sleep_time: The number of seconds to wait between
polls of running processors
- :type processor_poll_interval: int
+ :type scheduler_idle_sleep_time: int
:param do_pickle: once a DAG object is obtained by executing the Python
file, whether to serialize the DAG object to the DB
:type do_pickle: bool
@@ -104,9 +105,10 @@ class SchedulerJob(BaseJob):
subdir: str = settings.DAGS_FOLDER,
num_runs: int = conf.getint('scheduler', 'num_runs'),
num_times_parse_dags: int = -1,
- processor_poll_interval: float = conf.getfloat('scheduler',
'processor_poll_interval'),
+ scheduler_idle_sleep_time: float = conf.getfloat('scheduler',
'scheduler_idle_sleep_time'),
do_pickle: bool = False,
log: logging.Logger = None,
+ processor_poll_interval: Optional[float] = None,
*args,
**kwargs,
):
@@ -117,7 +119,16 @@ class SchedulerJob(BaseJob):
# number of times. This is only to support testing, and isn't
something a user is likely to want to
# configure -- they'll want num_runs
self.num_times_parse_dags = num_times_parse_dags
- self._processor_poll_interval = processor_poll_interval
+ if processor_poll_interval:
+ # TODO: Remove in Airflow 3.0
+ warnings.warn(
+ "The 'processor_poll_interval' parameter is deprecated. "
+ "Please use 'scheduler_idle_sleep_time'.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ scheduler_idle_sleep_time = processor_poll_interval
+ self._scheduler_idle_sleep_time = scheduler_idle_sleep_time
self.do_pickle = do_pickle
super().__init__(*args, **kwargs)
@@ -676,7 +687,7 @@ class SchedulerJob(BaseJob):
# If the scheduler is doing things, don't sleep. This means
when there is work to do, the
# scheduler will run "as quick as possible", but when it's
stopped, it can sleep, dropping CPU
# usage when "idle"
- time.sleep(min(self._processor_poll_interval, next_event))
+ time.sleep(min(self._scheduler_idle_sleep_time, next_event))
if loop_count >= self.num_runs > 0:
self.log.info(
diff --git a/docs/apache-airflow/best-practices.rst
b/docs/apache-airflow/best-practices.rst
index 514b77b..4e69109 100644
--- a/docs/apache-airflow/best-practices.rst
+++ b/docs/apache-airflow/best-practices.rst
@@ -314,7 +314,7 @@ In case you see long delays between updating it and the
time it is ready to be t
at the following configuration parameters and fine tune them according your
needs (see details of
each parameter by following the links):
-* :ref:`config:scheduler__processor_poll_interval`
+* :ref:`config:scheduler__scheduler_idle_sleep_time`
* :ref:`config:scheduler__min_file_process_interval`
* :ref:`config:scheduler__dag_dir_list_interval`
* :ref:`config:scheduler__parsing_processes`
diff --git a/docs/apache-airflow/concepts/scheduler.rst
b/docs/apache-airflow/concepts/scheduler.rst
index 7361a4c..fda92c9 100644
--- a/docs/apache-airflow/concepts/scheduler.rst
+++ b/docs/apache-airflow/concepts/scheduler.rst
@@ -366,7 +366,7 @@ However you can also look at other non-performance-related
scheduler configurati
The scheduler can run multiple processes in parallel to parse DAG files.
This defines
how many processes will run.
-- :ref:`config:scheduler__processor_poll_interval`
+- :ref:`config:scheduler__scheduler_idle_sleep_time`
Controls how long the scheduler will sleep between loops, but if there was
nothing to do
in the loop. i.e. if it scheduled something then it will start the next loop
iteration straight away. This parameter is badly named (historical reasons)
and it will be