[airflow] branch main updated: add a describe step on failure in eks tests (#30134)

potiuk Wed, 15 Mar 2023 17:17:12 -0700

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git



The following commit(s) were added to refs/heads/main by this push:
     new 6c4761aa37 add a describe step on failure in eks tests (#30134)
6c4761aa37 is described below

commit 6c4761aa37812981fa186ea6cc60d74a7705fa7a
Author: Raphaël Vandon <[email protected]>
AuthorDate: Wed Mar 15 17:16:57 2023 -0700

    add a describe step on failure in eks tests (#30134)
---
 .../aws/example_eks_with_fargate_in_one_step.py    | 21 ++++++++++++++++---
 .../amazon/aws/example_eks_with_fargate_profile.py | 22 +++++++++++++++++---
 .../aws/example_eks_with_nodegroup_in_one_step.py  | 20 ++++++++++++++++--
 .../amazon/aws/example_eks_with_nodegroups.py      | 24 ++++++++++++++++++++--
 4 files changed, 77 insertions(+), 10 deletions(-)

diff --git 
a/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py 
b/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py
index dfd0aec8f3..37cba110d9 100644
--- a/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py
+++ b/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py
@@ -20,6 +20,7 @@ from datetime import datetime
 
 from airflow.models.baseoperator import chain
 from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
 from airflow.providers.amazon.aws.hooks.eks import ClusterStates, 
FargateProfileStates
 from airflow.providers.amazon.aws.operators.eks import (
     EksCreateClusterOperator,
@@ -99,8 +100,21 @@ with DAG(
         labels={"demo": "hello_world"},
         get_logs=True,
         startup_timeout_seconds=600,
-        # Delete the pod when it reaches its final state, or the execution is 
interrupted.
-        is_delete_operator_pod=True,
+        # Keep the pod alive, so we can describe it in case of trouble. It's 
deleted with the cluster anyway.
+        is_delete_operator_pod=False,
+    )
+
+    describe_pod = BashOperator(
+        task_id="describe_pod",
+        bash_command=""
+        # using reinstall option so that it doesn't fail if already present
+        "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+        # configure kubectl to hit the cluster created
+        f"&& aws eks update-kubeconfig --name {cluster_name} "
+        # once all this setup is done, actually describe the pod
+        "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name', 
task_ids='run_pod') }}",
+        # only describe the pod if the task above failed, to help diagnose
+        trigger_rule=TriggerRule.ONE_FAILED,
     )
 
     # An Amazon EKS cluster can not be deleted with attached resources such as 
nodegroups or Fargate profiles.
@@ -127,9 +141,10 @@ with DAG(
         create_cluster_and_fargate_profile,
         await_create_fargate_profile,
         start_pod,
+        # TEST TEARDOWN
+        describe_pod,
         delete_cluster_and_fargate_profile,
         await_delete_cluster,
-        # TEST TEARDOWN
     )
 
     from tests.system.utils.watcher import watcher
diff --git 
a/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py 
b/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py
index 2139871a6f..5792332136 100644
--- a/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py
+++ b/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py
@@ -20,6 +20,7 @@ from datetime import datetime
 
 from airflow.models.baseoperator import chain
 from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
 from airflow.providers.amazon.aws.hooks.eks import ClusterStates, 
FargateProfileStates
 from airflow.providers.amazon.aws.operators.eks import (
     EksCreateClusterOperator,
@@ -117,11 +118,24 @@ with DAG(
         cmds=["sh", "-c", "echo Test Airflow; date"],
         labels={"demo": "hello_world"},
         get_logs=True,
-        # Delete the pod when it reaches its final state, or the execution is 
interrupted.
-        is_delete_operator_pod=True,
+        # Keep the pod alive, so we can describe it in case of trouble. It's 
deleted with the cluster anyway.
+        is_delete_operator_pod=False,
         startup_timeout_seconds=200,
     )
 
+    describe_pod = BashOperator(
+        task_id="describe_pod",
+        bash_command=""
+        # using reinstall option so that it doesn't fail if already present
+        "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+        # configure kubectl to hit the cluster created
+        f"&& aws eks update-kubeconfig --name {cluster_name} "
+        # once all this setup is done, actually describe the pod
+        "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name', 
task_ids='run_pod') }}",
+        # only describe the pod if the task above failed, to help diagnose
+        trigger_rule=TriggerRule.ONE_FAILED,
+    )
+
     # [START howto_operator_eks_delete_fargate_profile]
     delete_fargate_profile = EksDeleteFargateProfileOperator(
         task_id="delete_eks_fargate_profile",
@@ -162,7 +176,9 @@ with DAG(
         create_fargate_profile,
         await_create_fargate_profile,
         start_pod,
-        delete_fargate_profile,
+        # TEARDOWN
+        describe_pod,
+        delete_fargate_profile,  # part of the test AND teardown
         await_delete_fargate_profile,
         delete_cluster,
         await_delete_cluster,
diff --git 
a/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py 
b/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py
index e565eb9f78..2a4654c15e 100644
--- 
a/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py
+++ 
b/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py
@@ -20,6 +20,7 @@ from datetime import datetime
 
 from airflow.models.baseoperator import chain
 from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
 from airflow.providers.amazon.aws.hooks.eks import ClusterStates, 
NodegroupStates
 from airflow.providers.amazon.aws.operators.eks import (
     EksCreateClusterOperator,
@@ -86,8 +87,21 @@ with DAG(
         cmds=["sh", "-c", "echo Test Airflow; date"],
         labels={"demo": "hello_world"},
         get_logs=True,
-        # Delete the pod when it reaches its final state, or the execution is 
interrupted.
-        is_delete_operator_pod=True,
+        # Keep the pod alive, so we can describe it in case of trouble. It's 
deleted with the cluster anyway.
+        is_delete_operator_pod=False,
+    )
+
+    describe_pod = BashOperator(
+        task_id="describe_pod",
+        bash_command=""
+        # using reinstall option so that it doesn't fail if already present
+        "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+        # configure kubectl to hit the cluster created
+        f"&& aws eks update-kubeconfig --name {cluster_name} "
+        # once all this setup is done, actually describe the pod
+        "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name', 
task_ids='run_pod') }}",
+        # only describe the pod if the task above failed, to help diagnose
+        trigger_rule=TriggerRule.ONE_FAILED,
     )
 
     # [START howto_operator_eks_force_delete_cluster]
@@ -116,6 +130,8 @@ with DAG(
         create_cluster_and_nodegroup,
         await_create_nodegroup,
         start_pod,
+        # TEST TEARDOWN
+        describe_pod,
         delete_nodegroup_and_cluster,
         await_delete_cluster,
     )
diff --git a/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py 
b/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py
index bb9f9c21c9..fc8dd7daee 100644
--- a/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py
+++ b/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py
@@ -20,6 +20,7 @@ from datetime import datetime
 
 from airflow.models.baseoperator import chain
 from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
 from airflow.providers.amazon.aws.hooks.eks import ClusterStates, 
NodegroupStates
 from airflow.providers.amazon.aws.operators.eks import (
     EksCreateClusterOperator,
@@ -111,6 +112,23 @@ with DAG(
     )
     # [END howto_operator_eks_pod_operator]
 
+    # In this specific situation we want to keep the pod to be able to 
describe it,
+    # it is cleaned anyway with the cluster later on.
+    start_pod.is_delete_operator_pod = False
+
+    describe_pod = BashOperator(
+        task_id="describe_pod",
+        bash_command=""
+        # using reinstall option so that it doesn't fail if already present
+        "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+        # configure kubectl to hit the cluster created
+        f"&& aws eks update-kubeconfig --name {cluster_name} "
+        # once all this setup is done, actually describe the pod
+        "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name', 
task_ids='run_pod') }}",
+        # only describe the pod if the task above failed, to help diagnose
+        trigger_rule=TriggerRule.ONE_FAILED,
+    )
+
     # [START howto_operator_eks_delete_nodegroup]
     delete_nodegroup = EksDeleteNodegroupOperator(
         task_id="delete_nodegroup",
@@ -153,9 +171,11 @@ with DAG(
         create_nodegroup,
         await_create_nodegroup,
         start_pod,
-        delete_nodegroup,
+        # TEST TEARDOWN
+        describe_pod,
+        delete_nodegroup,  # part of the test AND teardown
         await_delete_nodegroup,
-        delete_cluster,
+        delete_cluster,  # part of the test AND teardown
         await_delete_cluster,
     )

[airflow] branch main updated: add a describe step on failure in eks tests (#30134)

Reply via email to