turbaszek commented on a change in pull request #10956:
URL: https://github.com/apache/airflow/pull/10956#discussion_r501238709



##########
File path: airflow/jobs/scheduler_job.py
##########
@@ -899,122 +667,54 @@ def process_file(
         except Exception:  # pylint: disable=broad-except
             self.log.exception("Failed at reloading the DAG file %s", 
file_path)
             Stats.incr('dag_file_refresh_error', 1, 1)
-            return [], 0
+            return 0, 0
 
         if len(dagbag.dags) > 0:
             self.log.info("DAG(s) %s retrieved from %s", dagbag.dags.keys(), 
file_path)
         else:
             self.log.warning("No viable dags retrieved from %s", file_path)
             self.update_import_errors(session, dagbag)
-            return [], len(dagbag.import_errors)
-
-        try:
-            self.execute_on_failure_callbacks(dagbag, 
failure_callback_requests)
-        except Exception:  # pylint: disable=broad-except
-            self.log.exception("Error executing failure callback!")
-
-        # Save individual DAGs in the ORM and update 
DagModel.last_scheduled_time
-        dagbag.sync_to_db()
-
-        paused_dag_ids = DagModel.get_paused_dag_ids(dag_ids=dagbag.dag_ids)
-
-        unpaused_dags: List[DAG] = [
-            dag for dag_id, dag in dagbag.dags.items() if dag_id not in 
paused_dag_ids
-        ]
-
-        serialized_dags = self._prepare_serialized_dags(unpaused_dags, 
pickle_dags, session)
+            return 0, len(dagbag.import_errors)
+
+        self.execute_callbacks(dagbag, callback_requests)
+
+        # Save individual DAGs in the ORM
+        dagbag.read_dags_from_db = True
+
+        # Retry 'dagbag.sync_to_db()' in case of any Operational Errors
+        # In case of failures, provide_session handles rollback
+        for attempt in tenacity.Retrying(
+            
retry=tenacity.retry_if_exception_type(exception_types=OperationalError),
+            wait=tenacity.wait_random_exponential(multiplier=0.5, max=5),
+            stop=tenacity.stop_after_attempt(settings.MAX_DB_RETRIES),
+            before_sleep=tenacity.before_sleep_log(self.log, logging.DEBUG),
+            reraise=True
+        ):
+            with attempt:
+                self.log.debug(
+                    "Running dagbag.sync_to_db with retries. Try %d of %d",
+                    attempt.retry_state.attempt_number,
+                    settings.MAX_DB_RETRIES
+                )
+                dagbag.sync_to_db()
 
-        dags = self._find_dags_to_process(unpaused_dags)
+        if pickle_dags:
+            paused_dag_ids = 
DagModel.get_paused_dag_ids(dag_ids=dagbag.dag_ids)
 
-        ti_keys_to_schedule = self._process_dags(dags, session)
+            unpaused_dags: List[DAG] = [
+                dag for dag_id, dag in dagbag.dags.items() if dag_id not in 
paused_dag_ids
+            ]
 
-        self._schedule_task_instances(dagbag, ti_keys_to_schedule, session)
+            for dag in unpaused_dags:
+                dag.pickle(session)

Review comment:
       This may be more effective as we are not using `unpaused_dags` anywhere 
else than this loop.
   ```python
   for dag_id, dag in dagbag.dags.items():
       if dag_id not in paused_dag_ids:
           dag.pickle(session)
   ```
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to