hongshaoyang commented on code in PR #30727:
URL: https://github.com/apache/airflow/pull/30727#discussion_r1859765051


##########
airflow/executors/kubernetes_executor_utils.py:
##########
@@ -0,0 +1,477 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import json
+import multiprocessing
+import time
+from queue import Empty, Queue
+from typing import TYPE_CHECKING, Any
+
+from kubernetes import client, watch
+from kubernetes.client import Configuration, models as k8s
+from kubernetes.client.rest import ApiException
+from urllib3.exceptions import ReadTimeoutError
+
+from airflow.exceptions import AirflowException
+from airflow.kubernetes.kube_client import get_kube_client
+from airflow.kubernetes.kubernetes_helper_functions import (
+    annotations_for_logging_task_metadata,
+    annotations_to_key,
+    create_pod_id,
+)
+from airflow.kubernetes.pod_generator import PodGenerator
+from airflow.utils.log.logging_mixin import LoggingMixin
+from airflow.utils.singleton import Singleton
+from airflow.utils.state import State
+
+if TYPE_CHECKING:
+    from airflow.executors.kubernetes_executor_types import (
+        KubernetesJobType,
+        KubernetesResultsType,
+        KubernetesWatchType,
+    )
+
+
+from airflow.executors.kubernetes_executor_types import ALL_NAMESPACES, 
POD_EXECUTOR_DONE_KEY
+
+
+class ResourceVersion(metaclass=Singleton):
+    """Singleton for tracking resourceVersion from Kubernetes."""
+
+    resource_version: dict[str, str] = {}
+
+
+class KubernetesJobWatcher(multiprocessing.Process, LoggingMixin):
+    """Watches for Kubernetes jobs."""
+
+    def __init__(
+        self,
+        namespace: str,
+        watcher_queue: Queue[KubernetesWatchType],
+        resource_version: str | None,
+        scheduler_job_id: str,
+        kube_config: Configuration,
+    ):
+        super().__init__()
+        self.namespace = namespace
+        self.scheduler_job_id = scheduler_job_id
+        self.watcher_queue = watcher_queue
+        self.resource_version = resource_version
+        self.kube_config = kube_config
+
+    def run(self) -> None:
+        """Performs watching."""
+        if TYPE_CHECKING:
+            assert self.scheduler_job_id
+
+        kube_client: client.CoreV1Api = get_kube_client()
+        while True:
+            try:
+                self.resource_version = self._run(
+                    kube_client, self.resource_version, self.scheduler_job_id, 
self.kube_config
+                )
+            except ReadTimeoutError:
+                self.log.warning(
+                    "There was a timeout error accessing the Kube API. 
Retrying request.", exc_info=True
+                )
+                time.sleep(1)
+            except Exception:
+                self.log.exception("Unknown error in KubernetesJobWatcher. 
Failing")
+                self.resource_version = "0"
+                ResourceVersion().resource_version[self.namespace] = "0"
+                raise
+            else:
+                self.log.warning(
+                    "Watch died gracefully, starting back up with: last 
resource_version: %s",
+                    self.resource_version,
+                )
+
+    def _pod_events(self, kube_client: client.CoreV1Api, query_kwargs: dict):
+        watcher = watch.Watch()
+        try:
+            if self.namespace == ALL_NAMESPACES:
+                return watcher.stream(kube_client.list_pod_for_all_namespaces, 
**query_kwargs)
+            else:
+                return watcher.stream(kube_client.list_namespaced_pod, 
self.namespace, **query_kwargs)
+        except ApiException as e:
+            if e.status == 410:  # Resource version is too old
+                if self.namespace == ALL_NAMESPACES:
+                    pods = kube_client.list_pod_for_all_namespaces(watch=False)
+                else:
+                    pods = 
kube_client.list_namespaced_pod(namespace=self.namespace, watch=False)
+                resource_version = pods.metadata.resource_version
+                query_kwargs["resource_version"] = resource_version
+                return self._pod_events(kube_client=kube_client, 
query_kwargs=query_kwargs)
+            else:
+                raise
+
+    def _run(
+        self,
+        kube_client: client.CoreV1Api,
+        resource_version: str | None,
+        scheduler_job_id: str,
+        kube_config: Any,
+    ) -> str | None:
+        self.log.info("Event: and now my watch begins starting at 
resource_version: %s", resource_version)

Review Comment:
   i love this reference :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to