syedahsn commented on code in PR #30853:
URL: https://github.com/apache/airflow/pull/30853#discussion_r1185359710
##########
airflow/providers/amazon/aws/triggers/redshift_cluster.py:
##########
@@ -137,3 +140,70 @@ async def run(self):
},
)
yield TriggerEvent({"status": "success", "message": "Cluster Created"})
+
+
+class RedshiftPauseClusterTrigger(BaseTrigger):
+ """
+ Trigger for RedshiftPauseClusterOperator.
+ The trigger will asynchronously poll the boto3 API and wait for the
+ Redshift cluster to be in the `paused` state.
+
+ :param cluster_identifier: A unique identifier for the cluster.
+ :param poll_interval: The amount of time in seconds to wait between
attempts.
+ :param max_attempts: The maximum number of attempts to be made.
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
+ """
+
+ def __init__(
+ self,
+ cluster_identifier: str,
+ poll_interval: int,
+ max_attempts: int,
+ aws_conn_id: str,
+ ):
+ self.cluster_identifier = cluster_identifier
+ self.poll_interval = poll_interval
+ self.max_attempts = max_attempts
+ self.aws_conn_id = aws_conn_id
+
+ def serialize(self) -> tuple[str, dict[str, Any]]:
+ return (
+
"airflow.providers.amazon.aws.triggers.redshift_cluster.RedshiftPauseClusterTrigger",
+ {
+ "cluster_identifier": str(self.cluster_identifier),
+ "poll_interval": str(self.poll_interval),
+ "max_attempts": str(self.max_attempts),
+ "aws_conn_id": str(self.aws_conn_id),
+ },
+ )
+
+ @cached_property
+ def hook(self) -> RedshiftHook:
+ return RedshiftHook(aws_conn_id=self.aws_conn_id)
+
+ async def run(self):
+ async with self.hook.async_conn as client:
+ attempt = 0
+ while attempt < int(self.max_attempts):
+ attempt = attempt + 1
+ try:
+ waiter = self.hook.get_waiter("cluster_paused",
deferrable=True, client=client)
+ await waiter.wait(
+ ClusterIdentifier=self.cluster_identifier,
+ WaiterConfig={
+ "Delay": int(self.poll_interval),
+ "MaxAttempts": 1,
Review Comment:
Yes unfortunately we do lose the aspect of retrying due to the logging
issue. Until boto supports logging internally, I can't see any way of logging
during the polling period. However, we still get to use the biggest benefit of
using these waiters, which is not having to write custom async code that will
make the API calls - this saves us from having to write a lot of additional
code that would, for the most part, be extremely redundant.
> With this, will it have an additional overhead of getting the waiter for
each attempt and launching it?
I hadn't noticed any performance issues with this, but that's a good point.
I moved the `get_waiter` call to outside of the while loop, so it should only
get created once now. Add a new test as well.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]