This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 6c4761aa37 add a describe step on failure in eks tests (#30134)
6c4761aa37 is described below
commit 6c4761aa37812981fa186ea6cc60d74a7705fa7a
Author: Raphaƫl Vandon <[email protected]>
AuthorDate: Wed Mar 15 17:16:57 2023 -0700
add a describe step on failure in eks tests (#30134)
---
.../aws/example_eks_with_fargate_in_one_step.py | 21 ++++++++++++++++---
.../amazon/aws/example_eks_with_fargate_profile.py | 22 +++++++++++++++++---
.../aws/example_eks_with_nodegroup_in_one_step.py | 20 ++++++++++++++++--
.../amazon/aws/example_eks_with_nodegroups.py | 24 ++++++++++++++++++++--
4 files changed, 77 insertions(+), 10 deletions(-)
diff --git
a/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py
b/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py
index dfd0aec8f3..37cba110d9 100644
--- a/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py
+++ b/tests/system/providers/amazon/aws/example_eks_with_fargate_in_one_step.py
@@ -20,6 +20,7 @@ from datetime import datetime
from airflow.models.baseoperator import chain
from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
from airflow.providers.amazon.aws.hooks.eks import ClusterStates,
FargateProfileStates
from airflow.providers.amazon.aws.operators.eks import (
EksCreateClusterOperator,
@@ -99,8 +100,21 @@ with DAG(
labels={"demo": "hello_world"},
get_logs=True,
startup_timeout_seconds=600,
- # Delete the pod when it reaches its final state, or the execution is
interrupted.
- is_delete_operator_pod=True,
+ # Keep the pod alive, so we can describe it in case of trouble. It's
deleted with the cluster anyway.
+ is_delete_operator_pod=False,
+ )
+
+ describe_pod = BashOperator(
+ task_id="describe_pod",
+ bash_command=""
+ # using reinstall option so that it doesn't fail if already present
+ "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+ # configure kubectl to hit the cluster created
+ f"&& aws eks update-kubeconfig --name {cluster_name} "
+ # once all this setup is done, actually describe the pod
+ "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name',
task_ids='run_pod') }}",
+ # only describe the pod if the task above failed, to help diagnose
+ trigger_rule=TriggerRule.ONE_FAILED,
)
# An Amazon EKS cluster can not be deleted with attached resources such as
nodegroups or Fargate profiles.
@@ -127,9 +141,10 @@ with DAG(
create_cluster_and_fargate_profile,
await_create_fargate_profile,
start_pod,
+ # TEST TEARDOWN
+ describe_pod,
delete_cluster_and_fargate_profile,
await_delete_cluster,
- # TEST TEARDOWN
)
from tests.system.utils.watcher import watcher
diff --git
a/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py
b/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py
index 2139871a6f..5792332136 100644
--- a/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py
+++ b/tests/system/providers/amazon/aws/example_eks_with_fargate_profile.py
@@ -20,6 +20,7 @@ from datetime import datetime
from airflow.models.baseoperator import chain
from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
from airflow.providers.amazon.aws.hooks.eks import ClusterStates,
FargateProfileStates
from airflow.providers.amazon.aws.operators.eks import (
EksCreateClusterOperator,
@@ -117,11 +118,24 @@ with DAG(
cmds=["sh", "-c", "echo Test Airflow; date"],
labels={"demo": "hello_world"},
get_logs=True,
- # Delete the pod when it reaches its final state, or the execution is
interrupted.
- is_delete_operator_pod=True,
+ # Keep the pod alive, so we can describe it in case of trouble. It's
deleted with the cluster anyway.
+ is_delete_operator_pod=False,
startup_timeout_seconds=200,
)
+ describe_pod = BashOperator(
+ task_id="describe_pod",
+ bash_command=""
+ # using reinstall option so that it doesn't fail if already present
+ "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+ # configure kubectl to hit the cluster created
+ f"&& aws eks update-kubeconfig --name {cluster_name} "
+ # once all this setup is done, actually describe the pod
+ "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name',
task_ids='run_pod') }}",
+ # only describe the pod if the task above failed, to help diagnose
+ trigger_rule=TriggerRule.ONE_FAILED,
+ )
+
# [START howto_operator_eks_delete_fargate_profile]
delete_fargate_profile = EksDeleteFargateProfileOperator(
task_id="delete_eks_fargate_profile",
@@ -162,7 +176,9 @@ with DAG(
create_fargate_profile,
await_create_fargate_profile,
start_pod,
- delete_fargate_profile,
+ # TEARDOWN
+ describe_pod,
+ delete_fargate_profile, # part of the test AND teardown
await_delete_fargate_profile,
delete_cluster,
await_delete_cluster,
diff --git
a/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py
b/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py
index e565eb9f78..2a4654c15e 100644
---
a/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py
+++
b/tests/system/providers/amazon/aws/example_eks_with_nodegroup_in_one_step.py
@@ -20,6 +20,7 @@ from datetime import datetime
from airflow.models.baseoperator import chain
from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
from airflow.providers.amazon.aws.hooks.eks import ClusterStates,
NodegroupStates
from airflow.providers.amazon.aws.operators.eks import (
EksCreateClusterOperator,
@@ -86,8 +87,21 @@ with DAG(
cmds=["sh", "-c", "echo Test Airflow; date"],
labels={"demo": "hello_world"},
get_logs=True,
- # Delete the pod when it reaches its final state, or the execution is
interrupted.
- is_delete_operator_pod=True,
+ # Keep the pod alive, so we can describe it in case of trouble. It's
deleted with the cluster anyway.
+ is_delete_operator_pod=False,
+ )
+
+ describe_pod = BashOperator(
+ task_id="describe_pod",
+ bash_command=""
+ # using reinstall option so that it doesn't fail if already present
+ "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+ # configure kubectl to hit the cluster created
+ f"&& aws eks update-kubeconfig --name {cluster_name} "
+ # once all this setup is done, actually describe the pod
+ "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name',
task_ids='run_pod') }}",
+ # only describe the pod if the task above failed, to help diagnose
+ trigger_rule=TriggerRule.ONE_FAILED,
)
# [START howto_operator_eks_force_delete_cluster]
@@ -116,6 +130,8 @@ with DAG(
create_cluster_and_nodegroup,
await_create_nodegroup,
start_pod,
+ # TEST TEARDOWN
+ describe_pod,
delete_nodegroup_and_cluster,
await_delete_cluster,
)
diff --git a/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py
b/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py
index bb9f9c21c9..fc8dd7daee 100644
--- a/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py
+++ b/tests/system/providers/amazon/aws/example_eks_with_nodegroups.py
@@ -20,6 +20,7 @@ from datetime import datetime
from airflow.models.baseoperator import chain
from airflow.models.dag import DAG
+from airflow.operators.bash import BashOperator
from airflow.providers.amazon.aws.hooks.eks import ClusterStates,
NodegroupStates
from airflow.providers.amazon.aws.operators.eks import (
EksCreateClusterOperator,
@@ -111,6 +112,23 @@ with DAG(
)
# [END howto_operator_eks_pod_operator]
+ # In this specific situation we want to keep the pod to be able to
describe it,
+ # it is cleaned anyway with the cluster later on.
+ start_pod.is_delete_operator_pod = False
+
+ describe_pod = BashOperator(
+ task_id="describe_pod",
+ bash_command=""
+ # using reinstall option so that it doesn't fail if already present
+ "install_aws.sh --reinstall " "&& install_kubectl.sh --reinstall "
+ # configure kubectl to hit the cluster created
+ f"&& aws eks update-kubeconfig --name {cluster_name} "
+ # once all this setup is done, actually describe the pod
+ "&& kubectl describe pod {{ ti.xcom_pull(key='pod_name',
task_ids='run_pod') }}",
+ # only describe the pod if the task above failed, to help diagnose
+ trigger_rule=TriggerRule.ONE_FAILED,
+ )
+
# [START howto_operator_eks_delete_nodegroup]
delete_nodegroup = EksDeleteNodegroupOperator(
task_id="delete_nodegroup",
@@ -153,9 +171,11 @@ with DAG(
create_nodegroup,
await_create_nodegroup,
start_pod,
- delete_nodegroup,
+ # TEST TEARDOWN
+ describe_pod,
+ delete_nodegroup, # part of the test AND teardown
await_delete_nodegroup,
- delete_cluster,
+ delete_cluster, # part of the test AND teardown
await_delete_cluster,
)