dimberman commented on a change in pull request #10996:
URL: https://github.com/apache/airflow/pull/10996#discussion_r494576691



##########
File path: airflow/executors/kubernetes_executor.py
##########
@@ -568,15 +601,14 @@ def _create_or_update_secret(secret_name, secret_path):
     def start(self) -> None:
         """Starts the executor"""
         self.log.info('Start Kubernetes executor')
-        self.worker_uuid = 
KubeWorkerIdentifier.get_or_create_current_kube_worker_uuid()
+        self.worker_uuid = self.job_id

Review comment:
       sure

##########
File path: airflow/executors/kubernetes_executor.py
##########
@@ -681,6 +714,78 @@ def _change_state(self,
                 self.log.debug('Could not find key: %s', str(key))
         self.event_buffer[key] = state, None
 
+    def try_adopt_task_instances(self, tis: List[TaskInstance]) -> 
List[TaskInstance]:
+        tis_to_flush = [ti for ti in tis if not ti.external_executor_id]
+        worker_uuids = [ti.external_executor_id for ti in tis]
+        pod_ids = {
+            create_pod_id(dag_id=ti.dag_id, task_id=ti.task_id): ti
+            for ti in tis if ti.external_executor_id
+        }
+        kube_client: client.CoreV1Api = self.kube_client
+        for worker_uuid in worker_uuids:
+            kwargs = {
+                'label_selector': f'airflow-worker={worker_uuid}'
+            }
+            pod_list = kube_client.list_namespaced_pod(
+                namespace=self.kube_config.kube_namespace,
+                **kwargs
+            )
+            for pod in pod_list.items:
+                self.adopt_launched_task(kube_client, pod, pod_ids)
+        tis_to_flush.extend(pod_ids.values())
+        if self.kube_config.delete_worker_pods:
+            self._adopt_completed_pods(kube_client)
+        return tis_to_flush
+
+    def adopt_launched_task(self, kube_client, pod, pod_ids: dict):
+        """
+        Patch existing pod so that the KubernetesJobWatcher can monitor it
+
+        :param kube_client: kubernetes client for speaking to kube API
+        :param pod: V1Pod spec that we will patch with new label
+        :param pod_ids: pod_ids we expect to patch.
+        """
+        self.log.info("attempting to adopt pod %s", pod.metadata.name)
+        pod.metadata.labels['airflow-worker'] = str(self.worker_uuid)
+        dag_id = pod.metadata.labels['dag_id']
+        task_id = pod.metadata.labels['task_id']
+        pod_id = create_pod_id(dag_id=dag_id, task_id=task_id)
+        if pod_id not in pod_ids:
+            raise AirflowException("attempting to adopt task not specified by 
database")
+        try:
+            kube_client.patch_namespaced_pod(
+                name=pod.metadata.name,
+                namespace=pod.metadata.namespace,
+                body=PodGenerator.serialize_pod(pod),
+            )
+            pod_ids.pop(pod_id)
+        except ApiException as e:
+            self.log.info("failed to adopt pod %s. reason: %s", 
pod.metadata.name, e)
+
+    def _adopt_completed_pods(self, kube_client):
+        """
+
+        Patch completed pod so that the KubernetesJobWatcher can delete it.
+        :param kube_client: kubernetes client for speaking to kube API
+        """
+        from airflow.version import version as airflow_version
+        kwargs = {
+            'field_selector': "status.phase=Succeeded",
+            'label_selector': f"airflow_version={airflow_version.replace('+', 
'-')}",

Review comment:
       @ashb do we want people to upgrade airflow mid-DAG run? We'd need to 
test the hell out of that every time we send out a new version so I could see 
it either way.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to