jscheffl commented on code in PR #58397:
URL: https://github.com/apache/airflow/pull/58397#discussion_r2542969269


##########
providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/kubernetes_helper_functions.py:
##########
@@ -39,6 +42,61 @@
 POD_NAME_MAX_LENGTH = 63  # Matches Linux kernel's HOST_NAME_MAX default value 
minus 1.
 
 
+class PodLaunchFailedException(AirflowException):
+    """When pod launching fails in KubernetesPodOperator."""
+
+
+class KubernetesApiException(AirflowException):
+    """When communication with kubernetes API fails."""
+
+
+API_RETRIES = conf.getint("workers", "api_retries", fallback=5)
+API_RETRY_WAIT_MIN = conf.getfloat("workers", "api_retry_wait_min", fallback=1)
+API_RETRY_WAIT_MAX = conf.getfloat("workers", "api_retry_wait_max", 
fallback=15)
+
+_default_wait = tenacity.wait_exponential(min=API_RETRY_WAIT_MIN, 
max=API_RETRY_WAIT_MAX)
+
+TRANSIENT_STATUS_CODES = {409, 429, 500, 502, 503, 504}
+
+
+def _should_retry_api(exc: BaseException) -> bool:
+    """Retry on selected ApiException status codes, plus plain HTTP/timeout 
errors."""
+    if isinstance(exc, ApiException):
+        return exc.status in TRANSIENT_STATUS_CODES
+    return isinstance(exc, (HTTPError, KubernetesApiException))
+
+
+class WaitRetryAfterOrExponential(tenacity.wait.wait_base):
+    """Wait strategy that honors Retry-After header on 429, else falls back to 
exponential backoff."""
+
+    def __call__(self, retry_state):
+        exc = retry_state.outcome.exception() if retry_state.outcome else None
+        if isinstance(exc, ApiException) and exc.status == 429:
+            retry_after = (exc.headers or {}).get("Retry-After")
+            if retry_after:
+                try:
+                    return float(int(retry_after))
+                except ValueError:
+                    pass
+        # Inline exponential fallback
+        return _default_wait(retry_state)
+
+
+def generic_api_retry(func):
+    """
+    Retry to Kubernetes API calls.
+
+    - Retries only transient ApiException status codes.
+    - Honors Retry-After on 429.
+    """
+    return tenacity.retry(

Review Comment:
   Thanks!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to