This is an automated email from the ASF dual-hosted git repository.

rnhttr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 6647610a8e Don't Fail LocalTaskJob on heartbeat (#41704)
6647610a8e is described below

commit 6647610a8e8e3de4d2bfb701e16d1c7b42edd3f8
Author: Collin McNulty <[email protected]>
AuthorDate: Mon Aug 26 10:07:01 2024 -0500

    Don't Fail LocalTaskJob on heartbeat (#41704)
    
    * Never fail an ltj over a heartbeat
    
    * Log a warning on failed heartbeat
    
    * Avoid using f-string in log
    
    * Remove unnecessary pass statement
---
 airflow/jobs/local_task_job_runner.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/airflow/jobs/local_task_job_runner.py 
b/airflow/jobs/local_task_job_runner.py
index 48eb547a19..95a471f239 100644
--- a/airflow/jobs/local_task_job_runner.py
+++ b/airflow/jobs/local_task_job_runner.py
@@ -208,9 +208,14 @@ class LocalTaskJobRunner(BaseJobRunner, LoggingMixin):
 
                     if span.is_recording():
                         span.add_event(name="perform_heartbeat")
-                    perform_heartbeat(
-                        job=self.job, 
heartbeat_callback=self.heartbeat_callback, only_if_necessary=False
-                    )
+                    try:
+                        perform_heartbeat(
+                            job=self.job, 
heartbeat_callback=self.heartbeat_callback, only_if_necessary=False
+                        )
+                    except Exception as e:
+                        # Failing the heartbeat should never kill the 
localtaskjob
+                        # If it repeatedly can't heartbeat, it will be marked 
as a zombie anyhow
+                        self.log.warning("Heartbeat failed with Exception: 
%s", e)
 
                     # If it's been too long since we've heartbeat, then it's 
possible that
                     # the scheduler rescheduled this task, so kill launched 
processes.

Reply via email to