turbaszek commented on a change in pull request #10956:
URL: https://github.com/apache/airflow/pull/10956#discussion_r501238709
##########
File path: airflow/jobs/scheduler_job.py
##########
@@ -899,122 +667,54 @@ def process_file(
except Exception: # pylint: disable=broad-except
self.log.exception("Failed at reloading the DAG file %s",
file_path)
Stats.incr('dag_file_refresh_error', 1, 1)
- return [], 0
+ return 0, 0
if len(dagbag.dags) > 0:
self.log.info("DAG(s) %s retrieved from %s", dagbag.dags.keys(),
file_path)
else:
self.log.warning("No viable dags retrieved from %s", file_path)
self.update_import_errors(session, dagbag)
- return [], len(dagbag.import_errors)
-
- try:
- self.execute_on_failure_callbacks(dagbag,
failure_callback_requests)
- except Exception: # pylint: disable=broad-except
- self.log.exception("Error executing failure callback!")
-
- # Save individual DAGs in the ORM and update
DagModel.last_scheduled_time
- dagbag.sync_to_db()
-
- paused_dag_ids = DagModel.get_paused_dag_ids(dag_ids=dagbag.dag_ids)
-
- unpaused_dags: List[DAG] = [
- dag for dag_id, dag in dagbag.dags.items() if dag_id not in
paused_dag_ids
- ]
-
- serialized_dags = self._prepare_serialized_dags(unpaused_dags,
pickle_dags, session)
+ return 0, len(dagbag.import_errors)
+
+ self.execute_callbacks(dagbag, callback_requests)
+
+ # Save individual DAGs in the ORM
+ dagbag.read_dags_from_db = True
+
+ # Retry 'dagbag.sync_to_db()' in case of any Operational Errors
+ # In case of failures, provide_session handles rollback
+ for attempt in tenacity.Retrying(
+
retry=tenacity.retry_if_exception_type(exception_types=OperationalError),
+ wait=tenacity.wait_random_exponential(multiplier=0.5, max=5),
+ stop=tenacity.stop_after_attempt(settings.MAX_DB_RETRIES),
+ before_sleep=tenacity.before_sleep_log(self.log, logging.DEBUG),
+ reraise=True
+ ):
+ with attempt:
+ self.log.debug(
+ "Running dagbag.sync_to_db with retries. Try %d of %d",
+ attempt.retry_state.attempt_number,
+ settings.MAX_DB_RETRIES
+ )
+ dagbag.sync_to_db()
- dags = self._find_dags_to_process(unpaused_dags)
+ if pickle_dags:
+ paused_dag_ids =
DagModel.get_paused_dag_ids(dag_ids=dagbag.dag_ids)
- ti_keys_to_schedule = self._process_dags(dags, session)
+ unpaused_dags: List[DAG] = [
+ dag for dag_id, dag in dagbag.dags.items() if dag_id not in
paused_dag_ids
+ ]
- self._schedule_task_instances(dagbag, ti_keys_to_schedule, session)
+ for dag in unpaused_dags:
+ dag.pickle(session)
Review comment:
This may be more effective as we are not using `unpaused_dags` anywhere
else than this loop.
```python
for dag_id, dag in dagbag.dags.items():
if dag_id not in paused_dag_ids:
dag.pickle(session)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]