This is an automated email from the ASF dual-hosted git repository.
jscheffl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 204020a329d KubernetesPodOperator: add base_container_name to the
templated fields (#47864)
204020a329d is described below
commit 204020a329d954dca14ef30ea7f72c25782da85b
Author: Balthazar Rouberol <[email protected]>
AuthorDate: Sat Mar 22 23:25:33 2025 +0100
KubernetesPodOperator: add base_container_name to the templated fields
(#47864)
I would like to propose adding `base_container_name` to the
`KubernetesPodOperator` templated fields.
The rationale is that the base container name is part of the log lines
emitted by the KubernetesPodManager, which is a good opportunity to have
it give as much context as possible.
For example, in a Wikimedia DAG, we defined the following operators:
```python
class WikimediaDumpOperator(KubernetesPodOperator):
"""
Base class for all types of wiki dumps run as Kubernetes Pods.
"""
dump_type = "generic"
def __init__(self, wiki: str, *args, **kwargs):
super().__init__(*args, **kwargs)
self.wiki = wiki
# Name of the "dumps" container (default is base, which isn't super
telling)
self.base_container_name = f"mediawiki-{self.dump_type}-dump"
# name of the pod itself
# made templated in https://github.com/apache/airflow/pull/46268
self.name = f"{self.base_container_name}-{wiki}"
class WikimediaSqlXmlDumpsOperator(WikimediaDumpOperator):
"""Operator class running the sql/xml wiki dumps as Kubernetes Pods"""
dump_type = "sql-xml"
class WikimediaWikidataDumpsOperator(WikimediaDumpOperator):
"""Operator class running the wikidata dumps as Kubernetes Pods"""
dump_type = "wikidata"
```
Adding `base_container_name` to the templated fields would allow us to
rewrite the `WikimediaDumpOperator` to the following:
```python
class WikimediaDumpOperator(KubernetesPodOperator):
"""
Base class for all types of wiki dumps run as Kubernetes Pods.
"""
dump_type = "generic"
def __init__(self, wiki: str, *args, **kwargs):
super().__init__(*args, **kwargs)
self.wiki = wiki
```
and we could invoke the operator as such:
```python
WikimediaSqlXmlOperator(
...,
base_container_name='mediawiki-{{ task.dump_type }}-dump',
name='{{ task.base_container_name }}-{{ task.wiki }}'
...
)
```
The endgame would be to have our logs contain as much context as
possible while avoiding mixing passing both keyword args to the
conttructor _and_ infering some attributes _within_ the `__init__`
method itself.
---
.../kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py | 3 ++-
.../cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py | 2 ++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
index cf8c581a2a6..7babcdd3eb7 100644
---
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
+++
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
@@ -213,7 +213,7 @@ class KubernetesPodOperator(BaseOperator):
:param base_container_name: The name of the base container in the pod.
This container's logs
will appear as part of this task's logs if get_logs is True. Defaults
to None. If None,
will consult the class variable BASE_CONTAINER_NAME (which defaults to
"base") for the base
- container name to use.
+ container name to use. (templated)
:param base_container_status_polling_interval: Polling period in seconds
to check for the pod base
container status. Default to 1s.
:param deferrable: Run operator in the deferrable mode.
@@ -263,6 +263,7 @@ class KubernetesPodOperator(BaseOperator):
"env_from",
"node_selector",
"kubernetes_conn_id",
+ "base_container_name",
)
template_fields_renderers = {"env_vars": "py"}
diff --git
a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py
b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py
index 68aea482be3..525f4a076e5 100644
--- a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py
+++ b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py
@@ -167,6 +167,7 @@ class TestKubernetesPodOperator:
task_id="task-id",
name="{{ dag.dag_id }}",
hostname="{{ dag.dag_id }}",
+ base_container_name="{{ dag.dag_id }}",
namespace="{{ dag.dag_id }}",
container_resources=k8s.V1ResourceRequirements(
requests={"memory": "{{ dag.dag_id }}", "cpu": "{{ dag.dag_id
}}"},
@@ -212,6 +213,7 @@ class TestKubernetesPodOperator:
assert dag_id == ti.task.cmds
assert dag_id == ti.task.name
assert dag_id == ti.task.hostname
+ assert dag_id == ti.task.base_container_name
assert dag_id == ti.task.namespace
assert dag_id == ti.task.config_file
assert dag_id == ti.task.labels