Lee-W commented on code in PR #36916:
URL: https://github.com/apache/airflow/pull/36916#discussion_r1465773514


##########
airflow/triggers/external_task.py:
##########
@@ -36,6 +38,103 @@
     from airflow.utils.state import DagRunState
 
 
+class WorkflowTrigger(BaseTrigger):
+    """
+    A trigger to monitor tasks, task group and dag execution in Apache Airflow.
+
+    :param external_dag_id: The ID of the external DAG.
+    :param execution_dates: A list of execution dates for the external DAG.
+    :param external_task_ids: A collection of external task IDs to wait for.
+    :param external_task_group_id: The ID of the external task group to wait 
for.
+    :param failed_states: States considered as failed for external tasks.
+    :param skipped_states: States considered as skipped for external tasks.
+    :param allowed_states: States considered as successful for external tasks.
+    :param poke_interval: The interval (in seconds) for poking the external 
tasks.
+    :param soft_fail: If True, the trigger will not fail the entire DAG on 
external task failure.
+    """
+
+    def __init__(
+        self,
+        external_dag_id: str,
+        execution_dates: list,
+        external_task_ids: typing.Collection[str] | None = None,
+        external_task_group_id: str | None = None,
+        failed_states: typing.Iterable[str] | None = None,
+        skipped_states: typing.Iterable[str] | None = None,
+        allowed_states: typing.Iterable[str] | None = None,
+        poke_interval: float = 2.0,
+        soft_fail: bool = False,
+        **kwargs,
+    ):
+        self.external_dag_id = external_dag_id
+        self.external_task_ids = external_task_ids
+        self.external_task_group_id = external_task_group_id
+        self.failed_states = failed_states
+        self.skipped_states = skipped_states
+        self.allowed_states = allowed_states
+        self.execution_dates = execution_dates
+        self.poke_interval = poke_interval
+        self.soft_fail = soft_fail
+        super().__init__(**kwargs)
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        """Serialize the trigger param and module path."""
+        return (
+            "airflow.triggers.external_task.WorkflowTrigger",
+            {
+                "external_dag_id": self.external_dag_id,
+                "external_task_ids": self.external_task_ids,
+                "external_task_group_id": self.external_task_group_id,
+                "failed_states": self.failed_states,
+                "skipped_states": self.skipped_states,
+                "allowed_states": self.allowed_states,
+                "execution_dates": self.execution_dates,
+                "poke_interval": self.poke_interval,
+                "soft_fail": self.soft_fail,
+            },
+        )
+
+    async def run(self) -> typing.AsyncIterator[TriggerEvent]:
+        """Check periodically tasks, task group or dag status."""
+        while True:
+            if self.failed_states:
+                count_failed = _get_count(

Review Comment:
   I'm not sure, but the name `failed_count` sounds more reasonable to me



##########
airflow/sensors/external_task.py:
##########
@@ -351,29 +348,30 @@ def execute(self, context: Context) -> None:
             super().execute(context)
         else:
             self.defer(
-                trigger=TaskStateTrigger(
-                    dag_id=self.external_dag_id,
-                    task_id=self.external_task_id,
+                timeout=self.execution_timeout,
+                trigger=WorkflowTrigger(
+                    external_dag_id=self.external_dag_id,
+                    external_task_ids=self.external_task_ids,
                     execution_dates=self._get_dttm_filter(context),
-                    states=self.allowed_states,
-                    trigger_start_time=utcnow(),
-                    poll_interval=self.poll_interval,
+                    allowed_states=self.allowed_states,
+                    poke_interval=self.poll_interval,
+                    soft_fail=self.soft_fail,
                 ),
                 method_name="execute_complete",
             )
 
     def execute_complete(self, context, event=None):
         """Execute when the trigger fires - return immediately."""
         if event["status"] == "success":
-            self.log.info("External task %s has executed successfully.", 
self.external_task_id)
-            return None
-        elif event["status"] == "timeout":
-            raise AirflowException("Dag was not started within 1 minute, 
assuming fail.")
+            self.log.info("External tasks %s has executed successfully.", 
self.external_task_ids)
         else:
-            raise AirflowException(
-                "Error occurred while trying to retrieve task status. Please, 
check the "
-                "name of executed task and Dag."
-            )
+            if self.soft_fail:
+                AirflowSkipException("External job has failed skipping.")

Review Comment:
   ```suggestion
                   raise AirflowSkipException("External job has failed 
skipping.")
   ```



##########
airflow/triggers/external_task.py:
##########
@@ -36,6 +38,103 @@
     from airflow.utils.state import DagRunState
 
 
+class WorkflowTrigger(BaseTrigger):
+    """
+    A trigger to monitor tasks, task group and dag execution in Apache Airflow.
+
+    :param external_dag_id: The ID of the external DAG.
+    :param execution_dates: A list of execution dates for the external DAG.
+    :param external_task_ids: A collection of external task IDs to wait for.
+    :param external_task_group_id: The ID of the external task group to wait 
for.
+    :param failed_states: States considered as failed for external tasks.
+    :param skipped_states: States considered as skipped for external tasks.
+    :param allowed_states: States considered as successful for external tasks.
+    :param poke_interval: The interval (in seconds) for poking the external 
tasks.
+    :param soft_fail: If True, the trigger will not fail the entire DAG on 
external task failure.
+    """
+
+    def __init__(
+        self,
+        external_dag_id: str,
+        execution_dates: list,
+        external_task_ids: typing.Collection[str] | None = None,
+        external_task_group_id: str | None = None,
+        failed_states: typing.Iterable[str] | None = None,
+        skipped_states: typing.Iterable[str] | None = None,
+        allowed_states: typing.Iterable[str] | None = None,
+        poke_interval: float = 2.0,
+        soft_fail: bool = False,
+        **kwargs,
+    ):
+        self.external_dag_id = external_dag_id
+        self.external_task_ids = external_task_ids
+        self.external_task_group_id = external_task_group_id
+        self.failed_states = failed_states
+        self.skipped_states = skipped_states
+        self.allowed_states = allowed_states
+        self.execution_dates = execution_dates
+        self.poke_interval = poke_interval
+        self.soft_fail = soft_fail
+        super().__init__(**kwargs)
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        """Serialize the trigger param and module path."""
+        return (
+            "airflow.triggers.external_task.WorkflowTrigger",
+            {
+                "external_dag_id": self.external_dag_id,
+                "external_task_ids": self.external_task_ids,
+                "external_task_group_id": self.external_task_group_id,
+                "failed_states": self.failed_states,
+                "skipped_states": self.skipped_states,
+                "allowed_states": self.allowed_states,
+                "execution_dates": self.execution_dates,
+                "poke_interval": self.poke_interval,
+                "soft_fail": self.soft_fail,
+            },
+        )
+
+    async def run(self) -> typing.AsyncIterator[TriggerEvent]:
+        """Check periodically tasks, task group or dag status."""
+        while True:
+            if self.failed_states:
+                count_failed = _get_count(
+                    self.execution_dates,
+                    self.external_task_ids,
+                    self.external_task_group_id,
+                    self.external_dag_id,
+                    self.failed_states,
+                )
+                if count_failed > 0:
+                    yield TriggerEvent({"status": "success"})
+                    return
+                else:
+                    yield TriggerEvent({"status": "success"})
+                    return
+            if self.skipped_states:
+                count_skipped = _get_count(
+                    self.execution_dates,
+                    self.external_task_ids,
+                    self.external_task_group_id,
+                    self.external_dag_id,
+                    self.skipped_states,
+                )
+                if count_skipped > 0:
+                    yield TriggerEvent({"status": "success"})

Review Comment:
   Should it be skipped instead?



##########
airflow/sensors/external_task.py:
##########
@@ -351,29 +348,30 @@ def execute(self, context: Context) -> None:
             super().execute(context)
         else:
             self.defer(
-                trigger=TaskStateTrigger(
-                    dag_id=self.external_dag_id,
-                    task_id=self.external_task_id,
+                timeout=self.execution_timeout,
+                trigger=WorkflowTrigger(
+                    external_dag_id=self.external_dag_id,
+                    external_task_ids=self.external_task_ids,
                     execution_dates=self._get_dttm_filter(context),
-                    states=self.allowed_states,
-                    trigger_start_time=utcnow(),
-                    poll_interval=self.poll_interval,
+                    allowed_states=self.allowed_states,
+                    poke_interval=self.poll_interval,
+                    soft_fail=self.soft_fail,
                 ),
                 method_name="execute_complete",
             )
 
     def execute_complete(self, context, event=None):
         """Execute when the trigger fires - return immediately."""
         if event["status"] == "success":
-            self.log.info("External task %s has executed successfully.", 
self.external_task_id)
-            return None
-        elif event["status"] == "timeout":
-            raise AirflowException("Dag was not started within 1 minute, 
assuming fail.")
+            self.log.info("External tasks %s has executed successfully.", 
self.external_task_ids)
         else:
-            raise AirflowException(
-                "Error occurred while trying to retrieve task status. Please, 
check the "
-                "name of executed task and Dag."
-            )
+            if self.soft_fail:

Review Comment:
   got it. make sense



##########
airflow/triggers/external_task.py:
##########
@@ -36,6 +38,103 @@
     from airflow.utils.state import DagRunState
 
 
+class WorkflowTrigger(BaseTrigger):
+    """
+    A trigger to monitor tasks, task group and dag execution in Apache Airflow.
+
+    :param external_dag_id: The ID of the external DAG.
+    :param execution_dates: A list of execution dates for the external DAG.
+    :param external_task_ids: A collection of external task IDs to wait for.
+    :param external_task_group_id: The ID of the external task group to wait 
for.
+    :param failed_states: States considered as failed for external tasks.
+    :param skipped_states: States considered as skipped for external tasks.
+    :param allowed_states: States considered as successful for external tasks.
+    :param poke_interval: The interval (in seconds) for poking the external 
tasks.
+    :param soft_fail: If True, the trigger will not fail the entire DAG on 
external task failure.
+    """
+
+    def __init__(
+        self,
+        external_dag_id: str,
+        execution_dates: list,
+        external_task_ids: typing.Collection[str] | None = None,
+        external_task_group_id: str | None = None,
+        failed_states: typing.Iterable[str] | None = None,
+        skipped_states: typing.Iterable[str] | None = None,
+        allowed_states: typing.Iterable[str] | None = None,
+        poke_interval: float = 2.0,
+        soft_fail: bool = False,
+        **kwargs,
+    ):
+        self.external_dag_id = external_dag_id
+        self.external_task_ids = external_task_ids
+        self.external_task_group_id = external_task_group_id
+        self.failed_states = failed_states
+        self.skipped_states = skipped_states
+        self.allowed_states = allowed_states
+        self.execution_dates = execution_dates
+        self.poke_interval = poke_interval
+        self.soft_fail = soft_fail
+        super().__init__(**kwargs)
+
+    def serialize(self) -> tuple[str, dict[str, Any]]:
+        """Serialize the trigger param and module path."""
+        return (
+            "airflow.triggers.external_task.WorkflowTrigger",
+            {
+                "external_dag_id": self.external_dag_id,
+                "external_task_ids": self.external_task_ids,
+                "external_task_group_id": self.external_task_group_id,
+                "failed_states": self.failed_states,
+                "skipped_states": self.skipped_states,
+                "allowed_states": self.allowed_states,
+                "execution_dates": self.execution_dates,
+                "poke_interval": self.poke_interval,
+                "soft_fail": self.soft_fail,
+            },
+        )
+
+    async def run(self) -> typing.AsyncIterator[TriggerEvent]:
+        """Check periodically tasks, task group or dag status."""
+        while True:
+            if self.failed_states:
+                count_failed = _get_count(
+                    self.execution_dates,
+                    self.external_task_ids,
+                    self.external_task_group_id,
+                    self.external_dag_id,
+                    self.failed_states,
+                )
+                if count_failed > 0:
+                    yield TriggerEvent({"status": "success"})

Review Comment:
   
   should it be failed instead?



##########
airflow/utils/sensor_helper.py:
##########
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import typing
+from typing import TYPE_CHECKING

Review Comment:
   
   ```suggestion
   from typing import TYPE_CHECKING, cast
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to