ashb commented on a change in pull request #10956:
URL: https://github.com/apache/airflow/pull/10956#discussion_r502470029
##########
File path: airflow/jobs/scheduler_job.py
##########
@@ -899,122 +667,54 @@ def process_file(
except Exception: # pylint: disable=broad-except
self.log.exception("Failed at reloading the DAG file %s",
file_path)
Stats.incr('dag_file_refresh_error', 1, 1)
- return [], 0
+ return 0, 0
if len(dagbag.dags) > 0:
self.log.info("DAG(s) %s retrieved from %s", dagbag.dags.keys(),
file_path)
else:
self.log.warning("No viable dags retrieved from %s", file_path)
self.update_import_errors(session, dagbag)
- return [], len(dagbag.import_errors)
-
- try:
- self.execute_on_failure_callbacks(dagbag,
failure_callback_requests)
- except Exception: # pylint: disable=broad-except
- self.log.exception("Error executing failure callback!")
-
- # Save individual DAGs in the ORM and update
DagModel.last_scheduled_time
- dagbag.sync_to_db()
-
- paused_dag_ids = DagModel.get_paused_dag_ids(dag_ids=dagbag.dag_ids)
-
- unpaused_dags: List[DAG] = [
- dag for dag_id, dag in dagbag.dags.items() if dag_id not in
paused_dag_ids
- ]
-
- serialized_dags = self._prepare_serialized_dags(unpaused_dags,
pickle_dags, session)
+ return 0, len(dagbag.import_errors)
+
+ self.execute_callbacks(dagbag, callback_requests)
+
+ # Save individual DAGs in the ORM
+ dagbag.read_dags_from_db = True
+
+ # Retry 'dagbag.sync_to_db()' in case of any Operational Errors
+ # In case of failures, provide_session handles rollback
+ for attempt in tenacity.Retrying(
+
retry=tenacity.retry_if_exception_type(exception_types=OperationalError),
+ wait=tenacity.wait_random_exponential(multiplier=0.5, max=5),
+ stop=tenacity.stop_after_attempt(settings.MAX_DB_RETRIES),
+ before_sleep=tenacity.before_sleep_log(self.log, logging.DEBUG),
+ reraise=True
+ ):
+ with attempt:
+ self.log.debug(
+ "Running dagbag.sync_to_db with retries. Try %d of %d",
+ attempt.retry_state.attempt_number,
+ settings.MAX_DB_RETRIES
+ )
+ dagbag.sync_to_db()
- dags = self._find_dags_to_process(unpaused_dags)
+ if pickle_dags:
+ paused_dag_ids =
DagModel.get_paused_dag_ids(dag_ids=dagbag.dag_ids)
- ti_keys_to_schedule = self._process_dags(dags, session)
+ unpaused_dags: List[DAG] = [
+ dag for dag_id, dag in dagbag.dags.items() if dag_id not in
paused_dag_ids
+ ]
- self._schedule_task_instances(dagbag, ti_keys_to_schedule, session)
+ for dag in unpaused_dags:
+ dag.pickle(session)
Review comment:
Nice. Changed in 36e23f613
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]