This is an automated email from the ASF dual-hosted git repository.

jscheffl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 204020a329d KubernetesPodOperator: add base_container_name to the 
templated fields (#47864)
204020a329d is described below

commit 204020a329d954dca14ef30ea7f72c25782da85b
Author: Balthazar Rouberol <[email protected]>
AuthorDate: Sat Mar 22 23:25:33 2025 +0100

    KubernetesPodOperator: add base_container_name to the templated fields 
(#47864)
    
    I would like to propose adding `base_container_name` to the
    `KubernetesPodOperator` templated fields.
    
    The rationale is that the base container name is part of the log lines
    emitted by the KubernetesPodManager, which is a good opportunity to have
    it give as much context as possible.
    
    For example, in a Wikimedia DAG, we defined the following operators:
    
    ```python
    class WikimediaDumpOperator(KubernetesPodOperator):
        """
        Base class for all types of wiki dumps run as Kubernetes Pods.
        """
    
        dump_type = "generic"
    
        def __init__(self, wiki: str, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.wiki = wiki
    
            # Name of the "dumps" container (default is base, which isn't super 
telling)
            self.base_container_name = f"mediawiki-{self.dump_type}-dump"
    
            # name of the pod itself
            # made templated in https://github.com/apache/airflow/pull/46268
            self.name = f"{self.base_container_name}-{wiki}"
    
    class WikimediaSqlXmlDumpsOperator(WikimediaDumpOperator):
        """Operator class running the sql/xml wiki dumps as Kubernetes Pods"""
    
        dump_type = "sql-xml"
    
    class WikimediaWikidataDumpsOperator(WikimediaDumpOperator):
        """Operator class running the wikidata dumps as Kubernetes Pods"""
    
        dump_type = "wikidata"
    
    ```
    
    Adding `base_container_name` to the templated fields would allow us to
    rewrite the `WikimediaDumpOperator` to the following:
    
    ```python
    class WikimediaDumpOperator(KubernetesPodOperator):
        """
        Base class for all types of wiki dumps run as Kubernetes Pods.
        """
    
        dump_type = "generic"
    
        def __init__(self, wiki: str, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.wiki = wiki
    ```
    
    and we could invoke the operator as such:
    
    ```python
    WikimediaSqlXmlOperator(
        ...,
        base_container_name='mediawiki-{{ task.dump_type }}-dump',
        name='{{ task.base_container_name }}-{{ task.wiki }}'
        ...
    )
    ```
    
    The endgame would be to have our logs contain as much context as
    possible while avoiding mixing passing both keyword args to the
    conttructor _and_ infering some attributes _within_ the `__init__`
    method itself.
---
 .../kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py  | 3 ++-
 .../cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py   | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
 
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
index cf8c581a2a6..7babcdd3eb7 100644
--- 
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
+++ 
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/operators/pod.py
@@ -213,7 +213,7 @@ class KubernetesPodOperator(BaseOperator):
     :param base_container_name: The name of the base container in the pod. 
This container's logs
         will appear as part of this task's logs if get_logs is True. Defaults 
to None. If None,
         will consult the class variable BASE_CONTAINER_NAME (which defaults to 
"base") for the base
-        container name to use.
+        container name to use. (templated)
     :param base_container_status_polling_interval: Polling period in seconds 
to check for the pod base
         container status. Default to 1s.
     :param deferrable: Run operator in the deferrable mode.
@@ -263,6 +263,7 @@ class KubernetesPodOperator(BaseOperator):
         "env_from",
         "node_selector",
         "kubernetes_conn_id",
+        "base_container_name",
     )
     template_fields_renderers = {"env_vars": "py"}
 
diff --git 
a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py 
b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py
index 68aea482be3..525f4a076e5 100644
--- a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py
+++ b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/operators/test_pod.py
@@ -167,6 +167,7 @@ class TestKubernetesPodOperator:
             task_id="task-id",
             name="{{ dag.dag_id }}",
             hostname="{{ dag.dag_id }}",
+            base_container_name="{{ dag.dag_id }}",
             namespace="{{ dag.dag_id }}",
             container_resources=k8s.V1ResourceRequirements(
                 requests={"memory": "{{ dag.dag_id }}", "cpu": "{{ dag.dag_id 
}}"},
@@ -212,6 +213,7 @@ class TestKubernetesPodOperator:
         assert dag_id == ti.task.cmds
         assert dag_id == ti.task.name
         assert dag_id == ti.task.hostname
+        assert dag_id == ti.task.base_container_name
         assert dag_id == ti.task.namespace
         assert dag_id == ti.task.config_file
         assert dag_id == ti.task.labels

Reply via email to