amoghrajesh commented on code in PR #68067:
URL: https://github.com/apache/airflow/pull/68067#discussion_r3393330009
##########
providers/apache/spark/src/airflow/providers/apache/spark/operators/spark_submit.py:
##########
@@ -321,12 +346,31 @@ def get_job_status(self, external_id: JsonValue, context:
Context) -> str:
if self._hook._is_yarn_cluster_mode:
return self._hook.query_yarn_application_status(external_id)
if self._hook._is_kubernetes:
- # The K8s branches below (and in is_job_active, is_job_succeeded,
poll_until_complete)
- # are currently unreachable: execute_resumable is only called when
_should_track_driver_status
- # is True, which requires spark:// + cluster mode. They are
scaffolding for a follow-up PR
- # that extends ResumableJobMixin support to Kubernetes.
- # TODO: call K8s pod status API
- raise NotImplementedError("K8s job status not yet implemented")
+ task_store = context.get("task_store")
+ if task_store is not None:
+ cached = task_store.get(self._K8S_DRIVER_STATUS_KEY)
+ if cached:
+ if TYPE_CHECKING:
+ assert isinstance(cached, str)
+ return cached
+ if kube_client is None:
+ raise RuntimeError(
+ "apache-airflow-providers-cncf-kubernetes is required to
query K8s pod status"
+ )
+ parts = external_id.split(":", 1)
+ if len(parts) != 2:
+ raise ValueError(
+ f"Invalid K8s external ID format {external_id!r}; expected
'namespace:pod_name'"
+ )
+ namespace, pod_name = parts
+ try:
+ client = kube_client.get_kube_client()
+ pod = client.read_namespaced_pod(pod_name, namespace)
+ return pod.status.phase
Review Comment:
On a related note, the hook's poll loop already guards this with `phase =
pod.status.phase or "Initializing"` but `get_job_status` was unguarded. Fixed
as suggedted, which maps to "still active, keep polling". Added a test for the
None phase case.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]