This is an automated email from the ASF dual-hosted git repository.

uranusjr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new a6299d47ea Allow timetable to slightly miss catchup cutoff (#33404)
a6299d47ea is described below

commit a6299d47eac9cba23d54e5b1f3ff589e76871eae
Author: Tzu-ping Chung <uranu...@gmail.com>
AuthorDate: Fri Aug 18 14:22:28 2023 +0800

    Allow timetable to slightly miss catchup cutoff (#33404)
---
 airflow/timetables/trigger.py              |  2 +-
 newsfragments/33404.significant.rst        | 16 ++++++++++++++++
 tests/timetables/test_trigger_timetable.py | 18 +++++++++++++++---
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/airflow/timetables/trigger.py b/airflow/timetables/trigger.py
index 7807542da5..e5c5b7c153 100644
--- a/airflow/timetables/trigger.py
+++ b/airflow/timetables/trigger.py
@@ -89,7 +89,7 @@ class CronTriggerTimetable(CronMixin, Timetable):
             else:
                 next_start_time = self._align_to_next(restriction.earliest)
         else:
-            start_time_candidates = [self._align_to_next(DateTime.utcnow())]
+            start_time_candidates = [self._align_to_prev(DateTime.utcnow())]
             if last_automated_data_interval is not None:
                 
start_time_candidates.append(self._get_next(last_automated_data_interval.end))
             if restriction.earliest is not None:
diff --git a/newsfragments/33404.significant.rst 
b/newsfragments/33404.significant.rst
new file mode 100644
index 0000000000..e0f14168cb
--- /dev/null
+++ b/newsfragments/33404.significant.rst
@@ -0,0 +1,16 @@
+CronTriggerTimetable is now less aggressive when trying to skip a run
+
+When setting ``catchup=False``, CronTriggerTimetable no longer skips a run if
+the scheduler does not query the timetable immediately after the previous run
+has been triggered.
+
+This should not affect scheduling in most cases, but can change the behaviour 
if
+a DAG is paused-unpaused to manually skip a run. Previously, the timetable 
(with
+``catchup=False``) would only start a run after a DAG is unpaused, but with 
this
+change, the scheduler would try to look at little bit back to schedule the
+previous run that covers a part of the period when the DAG was paused. This
+means you will need to keep a DAG paused longer (namely, for the entire cron
+period to pass) to really skip a run.
+
+Note that this is also the behaviour exhibited by various other cron-based
+scheduling tools, such as anacron.
diff --git a/tests/timetables/test_trigger_timetable.py 
b/tests/timetables/test_trigger_timetable.py
index 6f1d44479f..58e1f49df9 100644
--- a/tests/timetables/test_trigger_timetable.py
+++ b/tests/timetables/test_trigger_timetable.py
@@ -48,11 +48,11 @@ DELTA_FROM_MIDNIGHT = datetime.timedelta(minutes=30, 
hours=16)
     [
         pytest.param(
             None,
-            CURRENT_TIME + DELTA_FROM_MIDNIGHT,
+            YESTERDAY + DELTA_FROM_MIDNIGHT,
             id="first-run",
         ),
         pytest.param(
-            PREV_DATA_INTERVAL_EXACT,
+            DataInterval.exact(YESTERDAY + DELTA_FROM_MIDNIGHT),
             CURRENT_TIME + DELTA_FROM_MIDNIGHT,
             id="before-now",
         ),
@@ -89,9 +89,21 @@ def 
test_daily_cron_trigger_no_catchup_first_starts_at_next_schedule(
         pytest.param(
             pendulum.DateTime(2022, 7, 27, 0, 30, 0, tzinfo=TIMEZONE),
             START_DATE,
-            DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, 
tzinfo=TIMEZONE)),
+            DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 0, 0, 0, 
tzinfo=TIMEZONE)),
             id="current_time_not_on_boundary",
         ),
+        pytest.param(
+            pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE),
+            START_DATE,
+            DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, 
tzinfo=TIMEZONE)),
+            id="current_time_miss_one_interval_on_boundary",
+        ),
+        pytest.param(
+            pendulum.DateTime(2022, 7, 27, 1, 30, 0, tzinfo=TIMEZONE),
+            START_DATE,
+            DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, 
tzinfo=TIMEZONE)),
+            id="current_time_miss_one_interval_not_on_boundary",
+        ),
         pytest.param(
             pendulum.DateTime(2022, 7, 27, 0, 30, 0, tzinfo=TIMEZONE),
             pendulum.DateTime(2199, 12, 31, 22, 30, 0, tzinfo=TIMEZONE),

Reply via email to