syedahsn commented on code in PR #30865: URL: https://github.com/apache/airflow/pull/30865#discussion_r1178054263
########## airflow/providers/amazon/aws/triggers/batch.py: ########## @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from airflow.providers.amazon.aws.hooks.batch_client import BatchClientHook +from airflow.triggers.base import BaseTrigger, TriggerEvent + + +class BatchOperatorTrigger(BaseTrigger): + """ + Trigger for BatchOperator. + The trigger will asynchronously poll the boto3 API and wait for the + Batch job to be in the `SUCCEEDED` state. + + :param job_id: A unique identifier for the cluster. + :param max_retries: The maximum number of attempts to be made. + :param aws_conn_id: The Airflow connection used for AWS credentials. + :param region_name: region name to use in AWS Hook + :param poll_interval: The amount of time in seconds to wait between attempts. + """ + + def __init__( + self, + job_id: str | None = None, + max_retries: int | None = 5, + aws_conn_id: str | None = "aws_default", + region_name: str | None = None, + poll_interval: int = 10, + ): + super().__init__() + self.job_id = job_id + self.max_retries = max_retries + self.aws_conn_id = aws_conn_id + self.region_name = region_name + self.poll_interval = poll_interval + + def serialize(self) -> tuple[str, dict[str, Any]]: + """Serializes BatchOperatorTrigger arguments and classpath.""" + return ( + "airflow.providers.amazon.aws.triggers.batch.BatchOperatorTrigger", + { + "job_id": self.job_id, + "max_retries": self.max_retries, + "aws_conn_id": self.aws_conn_id, + "region_name": self.region_name, + "poll_interval": self.poll_interval, + }, + ) + + async def run(self): + hook = BatchClientHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name) + + async with hook.async_conn as client: + waiter = hook.get_waiter("JobComplete", deferrable=True, client=client) + await waiter.wait( + jobs=[self.job_id], + WaiterConfig={ + "delay": int(self.poll_interval), Review Comment: hmm that's interesting. As far as I understand it, `single_waiter_config` is taken from the `.json` file, so the spelling in the JSON file needs to be `delay`. But in the `wait` method, it `gets` the delay using ``` sleep_amount = config.get('Delay', self.config.delay) ``` where `config` is ``` config = kwargs.pop('WaiterConfig', {}) ``` Just to confirm, ``` async with hook.async_conn as client: waiter = hook.get_waiter("JobComplete", deferrable=True, client=client) await waiter.wait( jobs=[self.job_id], WaiterConfig={ "Delay": int(self.poll_interval), "MaxAttempts": int(self.max_retries), }, ) ``` this, along with the `batch.json` unchanged (i.e. `"delay": 300,`) didn't work? I'll try and take a closer look at it when I've got a bit of time. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
