This is an automated email from the ASF dual-hosted git repository. uranusjr pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new a6299d47ea Allow timetable to slightly miss catchup cutoff (#33404) a6299d47ea is described below commit a6299d47eac9cba23d54e5b1f3ff589e76871eae Author: Tzu-ping Chung <uranu...@gmail.com> AuthorDate: Fri Aug 18 14:22:28 2023 +0800 Allow timetable to slightly miss catchup cutoff (#33404) --- airflow/timetables/trigger.py | 2 +- newsfragments/33404.significant.rst | 16 ++++++++++++++++ tests/timetables/test_trigger_timetable.py | 18 +++++++++++++++--- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/airflow/timetables/trigger.py b/airflow/timetables/trigger.py index 7807542da5..e5c5b7c153 100644 --- a/airflow/timetables/trigger.py +++ b/airflow/timetables/trigger.py @@ -89,7 +89,7 @@ class CronTriggerTimetable(CronMixin, Timetable): else: next_start_time = self._align_to_next(restriction.earliest) else: - start_time_candidates = [self._align_to_next(DateTime.utcnow())] + start_time_candidates = [self._align_to_prev(DateTime.utcnow())] if last_automated_data_interval is not None: start_time_candidates.append(self._get_next(last_automated_data_interval.end)) if restriction.earliest is not None: diff --git a/newsfragments/33404.significant.rst b/newsfragments/33404.significant.rst new file mode 100644 index 0000000000..e0f14168cb --- /dev/null +++ b/newsfragments/33404.significant.rst @@ -0,0 +1,16 @@ +CronTriggerTimetable is now less aggressive when trying to skip a run + +When setting ``catchup=False``, CronTriggerTimetable no longer skips a run if +the scheduler does not query the timetable immediately after the previous run +has been triggered. + +This should not affect scheduling in most cases, but can change the behaviour if +a DAG is paused-unpaused to manually skip a run. Previously, the timetable (with +``catchup=False``) would only start a run after a DAG is unpaused, but with this +change, the scheduler would try to look at little bit back to schedule the +previous run that covers a part of the period when the DAG was paused. This +means you will need to keep a DAG paused longer (namely, for the entire cron +period to pass) to really skip a run. + +Note that this is also the behaviour exhibited by various other cron-based +scheduling tools, such as anacron. diff --git a/tests/timetables/test_trigger_timetable.py b/tests/timetables/test_trigger_timetable.py index 6f1d44479f..58e1f49df9 100644 --- a/tests/timetables/test_trigger_timetable.py +++ b/tests/timetables/test_trigger_timetable.py @@ -48,11 +48,11 @@ DELTA_FROM_MIDNIGHT = datetime.timedelta(minutes=30, hours=16) [ pytest.param( None, - CURRENT_TIME + DELTA_FROM_MIDNIGHT, + YESTERDAY + DELTA_FROM_MIDNIGHT, id="first-run", ), pytest.param( - PREV_DATA_INTERVAL_EXACT, + DataInterval.exact(YESTERDAY + DELTA_FROM_MIDNIGHT), CURRENT_TIME + DELTA_FROM_MIDNIGHT, id="before-now", ), @@ -89,9 +89,21 @@ def test_daily_cron_trigger_no_catchup_first_starts_at_next_schedule( pytest.param( pendulum.DateTime(2022, 7, 27, 0, 30, 0, tzinfo=TIMEZONE), START_DATE, - DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE)), + DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 0, 0, 0, tzinfo=TIMEZONE)), id="current_time_not_on_boundary", ), + pytest.param( + pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE), + START_DATE, + DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE)), + id="current_time_miss_one_interval_on_boundary", + ), + pytest.param( + pendulum.DateTime(2022, 7, 27, 1, 30, 0, tzinfo=TIMEZONE), + START_DATE, + DagRunInfo.exact(pendulum.DateTime(2022, 7, 27, 1, 0, 0, tzinfo=TIMEZONE)), + id="current_time_miss_one_interval_not_on_boundary", + ), pytest.param( pendulum.DateTime(2022, 7, 27, 0, 30, 0, tzinfo=TIMEZONE), pendulum.DateTime(2199, 12, 31, 22, 30, 0, tzinfo=TIMEZONE),