[ https://issues.apache.org/jira/browse/AIRFLOW-5581?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Kiran updated AIRFLOW-5581: --------------------------- Description: If run_duration or num_runs is enabled, KubernetesJobWatcher dies prematurely causing below error: [2019-10-01 19:40:35,323] \{kubernetes_executor.py:327} ERROR - Unknown error in KubernetesJobWatcher. Failing Traceback (most recent call last): File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 325, in run self.worker_uuid, self.kube_config) File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 359, in _run task.metadata.resource_version File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 391, in process_status self.watcher_queue.put((pod_id, None, labels, resource_version)) File "<string>", line 2, in put File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod conn.send((self._id, methodname, args, kwds)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 206, in send self._send_bytes(_ForkingPickler.dumps(obj)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 404, in _send_bytes self._send(header + buf) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 368, in _send n = write(self._handle, buf) BrokenPipeError: [Errno 32] Broken pipe Process KubernetesJobWatcher-3: Traceback (most recent call last): File "/usr/lib64/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 325, in run self.worker_uuid, self.kube_config) File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 359, in _run task.metadata.resource_version File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 391, in process_status self.watcher_queue.put((pod_id, None, labels, resource_version)) File "<string>", line 2, in put File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod conn.send((self._id, methodname, args, kwds)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 206, in send self._send_bytes(_ForkingPickler.dumps(obj)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 404, in _send_bytes self._send(header + buf) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 368, in _send n = write(self._handle, buf) BrokenPipeError: [Errno 32] Broken pipe was: If run_duration or num_runs is enabled, [2019-10-01 19:40:35,323] \{kubernetes_executor.py:327} ERROR - Unknown error in KubernetesJobWatcher. Failing Traceback (most recent call last): File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 325, in run self.worker_uuid, self.kube_config) File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 359, in _run task.metadata.resource_version File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 391, in process_status self.watcher_queue.put((pod_id, None, labels, resource_version)) File "<string>", line 2, in put File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod conn.send((self._id, methodname, args, kwds)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 206, in send self._send_bytes(_ForkingPickler.dumps(obj)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 404, in _send_bytes self._send(header + buf) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 368, in _send n = write(self._handle, buf) BrokenPipeError: [Errno 32] Broken pipe Process KubernetesJobWatcher-3: Traceback (most recent call last): File "/usr/lib64/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 325, in run self.worker_uuid, self.kube_config) File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 359, in _run task.metadata.resource_version File "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", line 391, in process_status self.watcher_queue.put((pod_id, None, labels, resource_version)) File "<string>", line 2, in put File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod conn.send((self._id, methodname, args, kwds)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 206, in send self._send_bytes(_ForkingPickler.dumps(obj)) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 404, in _send_bytes self._send(header + buf) File "/usr/lib64/python3.6/multiprocessing/connection.py", line 368, in _send n = write(self._handle, buf) BrokenPipeError: [Errno 32] Broken pipe > KubernetesJobWatcher dies prematurely with num_runs or run_duration > ------------------------------------------------------------------- > > Key: AIRFLOW-5581 > URL: https://issues.apache.org/jira/browse/AIRFLOW-5581 > Project: Apache Airflow > Issue Type: Bug > Components: contrib > Affects Versions: 1.10.5 > Reporter: Kiran > Assignee: Kiran > Priority: Major > > If run_duration or num_runs is enabled, KubernetesJobWatcher dies prematurely > causing below error: > [2019-10-01 19:40:35,323] \{kubernetes_executor.py:327} ERROR - Unknown error > in KubernetesJobWatcher. Failing > Traceback (most recent call last): > File > "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", > line 325, in run > self.worker_uuid, self.kube_config) > File > "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", > line 359, in _run > task.metadata.resource_version > File > "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", > line 391, in process_status > self.watcher_queue.put((pod_id, None, labels, resource_version)) > File "<string>", line 2, in put > File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in > _callmethod > conn.send((self._id, methodname, args, kwds)) > File "/usr/lib64/python3.6/multiprocessing/connection.py", line 206, in send > self._send_bytes(_ForkingPickler.dumps(obj)) > File "/usr/lib64/python3.6/multiprocessing/connection.py", line 404, in > _send_bytes > self._send(header + buf) > File "/usr/lib64/python3.6/multiprocessing/connection.py", line 368, in _send > n = write(self._handle, buf) > BrokenPipeError: [Errno 32] Broken pipe > Process KubernetesJobWatcher-3: > Traceback (most recent call last): > File "/usr/lib64/python3.6/multiprocessing/process.py", line 258, in > _bootstrap > self.run() > File > "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", > line 325, in run > self.worker_uuid, self.kube_config) > File > "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", > line 359, in _run > task.metadata.resource_version > File > "/home/vccorp/.local/lib/python3.6/site-packages/airflow/contrib/executors/kubernetes_executor.py", > line 391, in process_status > self.watcher_queue.put((pod_id, None, labels, resource_version)) > File "<string>", line 2, in put > File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in > _callmethod > conn.send((self._id, methodname, args, kwds)) > File "/usr/lib64/python3.6/multiprocessing/connection.py", line 206, in send > self._send_bytes(_ForkingPickler.dumps(obj)) > File "/usr/lib64/python3.6/multiprocessing/connection.py", line 404, in > _send_bytes > self._send(header + buf) > File "/usr/lib64/python3.6/multiprocessing/connection.py", line 368, in _send > n = write(self._handle, buf) > BrokenPipeError: [Errno 32] Broken pipe -- This message was sent by Atlassian Jira (v8.3.4#803005)