vandonr-amz commented on code in PR #31881:
URL: https://github.com/apache/airflow/pull/31881#discussion_r1228623744


##########
airflow/providers/amazon/aws/operators/ecs.py:
##########
@@ -490,7 +549,26 @@ def execute(self, context, session=None):
         if self.reattach:
             self._try_reattach_task(context)
 
-        self._start_wait_check_task(context)
+        self._start_wait_task(context)
+
+        self._after_execution(session)
+
+        if self.do_xcom_push and self.task_log_fetcher:
+            return self.task_log_fetcher.get_last_log_message()
+        else:
+            return None
+
+    def execute_complete(self, context, event=None):
+        if event["status"] != "success":
+            raise AirflowException(f"Error in task execution: {event}")
+        self.arn = event["task_arn"]  # restore arn to its updated value
+        self._after_execution()
+        if self._aws_logs_enabled():
+            ...  # TODO return last log line but task_log_fetcher will always 
be None here
+
+    @provide_session
+    def _after_execution(self, session=None):

Review Comment:
   I wanted to extract this to reuse it in `execute` and `execute_complete`, 
but I wouldn't find a great name for it.



##########
airflow/providers/amazon/aws/operators/ecs.py:
##########
@@ -499,18 +577,26 @@ def execute(self, context, session=None):
             # as we can't reattach it anymore
             self._xcom_del(session, 
self.REATTACH_XCOM_TASK_ID_TEMPLATE.format(task_id=self.task_id))
 
-        if self.do_xcom_push and self.task_log_fetcher:
-            return self.task_log_fetcher.get_last_log_message()
-
-        return None
-
     @AwsBaseHook.retry(should_retry_eni)
-    def _start_wait_check_task(self, context):
+    def _start_wait_task(self, context):

Review Comment:
   the check went to `_after_execution`



##########
airflow/providers/amazon/aws/operators/ecs.py:
##########
@@ -67,6 +71,15 @@ def execute(self, context: Context):
         """Must overwrite in child classes."""
         raise NotImplementedError("Please implement execute() in subclass")
 
+    def _complete_exec_with_cluster_desc(self, context, event=None):

Review Comment:
   this callback is shared between create and delete cluster operators, so I 
put it there. It felt like a better solution than copy-pasting it for both.



##########
airflow/providers/amazon/aws/triggers/ecs.py:
##########
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any, AsyncIterator
+
+from botocore.exceptions import ClientError, WaiterError
+
+from airflow import AirflowException
+from airflow.providers.amazon.aws.hooks.ecs import EcsHook, EcsTaskLogFetcher
+from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
+from airflow.triggers.base import BaseTrigger, TriggerEvent
+
+
+class ClusterWaiterTrigger(BaseTrigger):
+    """
+    Polls the status of a cluster using a given waiter. Can be used to poll 
for an active or inactive cluster.
+
+    :param waiter_name: Name of the waiter to use, for instance 
'cluster_active' or 'cluster_inactive'
+    :param cluster_arn: ARN of the cluster to watch.
+    :param waiter_delay: The amount of time in seconds to wait between 
attempts.
+    :param waiter_max_attempts: The number of times to ping for status.
+        Will fail after that many unsuccessful attempts.
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+    :param region: The AWS region where the cluster is located.
+    """
+
+    def __init__(
+        self,
+        waiter_name: str,
+        cluster_arn: str,
+        waiter_delay: int | None,
+        waiter_max_attempts: int | None,
+        aws_conn_id: str | None,
+        region: str | None,
+    ):
+        self.cluster_arn = cluster_arn
+        self.waiter_name = waiter_name
+        self.waiter_delay = waiter_delay if waiter_delay is not None else 15  
# written like this to allow 0
+        self.attempts = waiter_max_attempts or 999999999
+        self.aws_conn_id = aws_conn_id
+        self.region = region
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        return (
+            self.__class__.__module__ + "." + self.__class__.__qualname__,
+            {
+                "waiter_name": self.waiter_name,
+                "cluster_arn": self.cluster_arn,
+                "waiter_delay": self.waiter_delay,
+                "waiter_max_attempts": self.attempts,
+                "aws_conn_id": self.aws_conn_id,
+                "region": self.region,
+            },
+        )
+
+    async def run(self) -> AsyncIterator[TriggerEvent]:
+        async with EcsHook(aws_conn_id=self.aws_conn_id, 
region_name=self.region).async_conn as client:
+            waiter = client.get_waiter(self.waiter_name)
+            while self.attempts >= 1:
+                self.attempts = self.attempts - 1
+                try:
+                    await waiter.wait(
+                        clusters=[self.cluster_arn],
+                        WaiterConfig={
+                            "MaxAttempts": 1,
+                        },
+                    )
+                    # we reach this point only if the waiter met a success 
criteria
+                    yield TriggerEvent({"status": "success", "arn": 
self.cluster_arn})
+                    return
+                except WaiterError as error:
+                    if "terminal failure" in str(error):
+                        raise
+                    self.log.info("Status of cluster is %s", 
error.last_response["clusters"][0]["status"])
+                    await asyncio.sleep(int(self.waiter_delay))
+
+        raise AirflowException(
+            "Cluster still not in expected status after the max number of 
tries has been reached"
+        )
+
+
+class TaskDoneTrigger(BaseTrigger):
+    """
+    Waits for an ECS task to be done, while eventually polling logs.
+
+    :param cluster: short name or full ARN of the cluster where the task is 
running.
+    :param task_arn: ARN of the task to watch.
+    :param waiter_delay: The amount of time in seconds to wait between 
attempts.
+    :param waiter_max_attempts: The number of times to ping for status.
+        Will fail after that many unsuccessful attempts.
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+    :param region: The AWS region where the cluster is located.
+    """
+
+    def __init__(
+        self,
+        cluster: str,
+        task_arn: str,
+        waiter_delay: int | None,
+        aws_conn_id: str | None,
+        region: str | None,
+        log_group: str | None = None,
+        log_stream: str | None = None,
+    ):
+        self.cluster = cluster
+        self.task_arn = task_arn
+
+        self.waiter_delay = waiter_delay or 15
+        self.aws_conn_id = aws_conn_id
+        self.region = region
+
+        self.log_group = log_group
+        self.log_stream = log_stream
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        return (
+            self.__class__.__module__ + "." + self.__class__.__qualname__,
+            {
+                "cluster": self.cluster,
+                "task_arn": self.task_arn,
+                "waiter_delay": self.waiter_delay,
+                "aws_conn_id": self.aws_conn_id,
+                "region": self.region,
+                "log_group": self.log_group,
+                "log_stream": self.log_stream,
+            },
+        )
+
+    async def run(self) -> AsyncIterator[TriggerEvent]:
+        # fmt: off
+        async with EcsHook(aws_conn_id=self.aws_conn_id, 
region_name=self.region).async_conn as ecs_client,\
+                AwsLogsHook(aws_conn_id=self.aws_conn_id, 
region_name=self.region).async_conn as logs_client:
+            # fmt: on
+            waiter = ecs_client.get_waiter("tasks_stopped")
+            logs_token = None
+            while True:
+                try:
+                    await waiter.wait(
+                        cluster=self.cluster, tasks=[self.task_arn], 
WaiterConfig={"MaxAttempts": 1}
+                    )
+                    break  # we reach this point only if the waiter met a 
success criteria
+                except WaiterError as error:
+                    if "terminal failure" in str(error):
+                        raise
+                    self.log.info("Status of the task is %s", 
error.last_response["tasks"][0]["lastStatus"])
+                    await asyncio.sleep(int(self.waiter_delay))
+                finally:
+                    if self.log_group and self.log_stream:
+                        logs_token = await self._forward_logs(logs_client, 
logs_token)
+
+        yield TriggerEvent({"status": "success", "task_arn": self.task_arn})
+
+    async def _forward_logs(self, logs_client, next_token: str | None = None) 
-> str | None:

Review Comment:
   this is code that _very inspired_ by 
https://github.com/apache/airflow/blob/main/airflow/providers/amazon/aws/hooks/logs.py#L53
 but since I need to use an async call in the middle, refactoring the existing 
code to allow that seemed like a lot of added complexity to the existing code



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to