dstandish commented on code in PR #43520:
URL: https://github.com/apache/airflow/pull/43520#discussion_r1823096131


##########
airflow/jobs/scheduler_job_runner.py:
##########
@@ -1822,9 +1822,30 @@ def _fail_tasks_stuck_in_queued(self, session: Session = 
NEW_SESSION) -> None:
                                 ),
                             )
                         )
+                        num_times_stuck = self._get_num_times_stuck(ti, 
session)
+                        if num_times_stuck < conf.getint("core", 
"num_stuck_retries", fallback=3):
+                            self._reset_task_instance(ti, session)
+
+
             except NotImplementedError:
                 self.log.debug("Executor doesn't support cleanup of stuck 
queued tasks. Skipping.")
 
+    @provide_session
+    def _get_num_times_stuck(self, ti: TaskInstance, session: Session = 
NEW_SESSION) -> int:
+        return len(session.scalars(select(Log).where(Log.dag_id == ti.task_id)
+                        .where(Log.dag_id == ti.dag_id)
+                        .where(Log.run_id == ti.run_id)
+                        .where(Log.map_index == ti.map_index)
+                        .where(Log.try_number == ti.try_number)
+                        .where(Log.event == "stuck in queued")
+                        ))
+
+    def _reset_task_instance(self, ti: TaskInstance, session: Session = 
NEW_SESSION):
+        ti.external_executor_id = None
+        ti.state = State.SCHEDULED
+        session.add(ti)

Review Comment:
   Merge not add



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to