Nataneljpwd commented on code in PR #61274:
URL: https://github.com/apache/airflow/pull/61274#discussion_r2778068043
##########
airflow-core/src/airflow/models/dagrun.py:
##########
@@ -1410,24 +1416,44 @@ def notify_dagrun_state_changed(self, msg: str):
# or LocalTaskJob, so we don't want to "falsely advertise" we notify
about that
@provide_session
- def get_last_ti(self, dag: SerializedDAG, session: Session = NEW_SESSION)
-> TI | None:
- """Get Last TI from the dagrun to build and pass Execution context
object from server to then run callbacks."""
+ def get_first_ti_causing_failure(self, dag: SerializedDAG, session:
Session = NEW_SESSION) -> TI | None:
+ """
+ Get the first task instance that would cause a leaf task to fail the
run.
+ """
+
tis = self.get_task_instances(session=session)
- # tis from a dagrun may not be a part of dag.partial_subset,
- # since dag.partial_subset is a subset of the dag.
- # This ensures that we will only use the accessible TI
- # context for the callback.
+
+ failed_leaf_tis = [
+ ti for ti in self._tis_for_dagrun_state(dag=dag, tis=tis)
+ if ti.state in State.failed_states
+ ]
+
+ if not failed_leaf_tis:
+ return None
+
if dag.partial:
- tis = [ti for ti in tis if not ti.state == State.NONE]
- # filter out removed tasks
- tis = natsorted(
- (ti for ti in tis if ti.state != TaskInstanceState.REMOVED),
- key=lambda ti: ti.task_id,
- )
- if not tis:
- return None
- ti = tis[-1] # get last TaskInstance of DagRun
- return ti
+ tis = [
+ ti for ti in tis if not ti.state in (
+ State.NONE, TaskInstanceState.REMOVED
+ )
+ ]
+
+ # Collect all task IDs on failure paths
+ failure_path_task_ids = set()
+ for failed_leaf in failed_leaf_tis:
+ leaf_task = dag.get_task(failed_leaf.task_id)
+ upstream_ids = leaf_task.get_flat_relative_ids(upstream=True)
+ failure_path_task_ids.update(upstream_ids)
+ failure_path_task_ids.add(failed_leaf.task_id)
+
+ # Find failed tasks on possible failure paths
+ failed_on_paths = [
+ ti for ti in tis
+ if ti.task_id in failure_path_task_ids and ti.state ==
State.FAILED
+ ]
Review Comment:
My point is, one of the last failed tasks is the one that you need, and you
do not need to get all task instances of a dag for that
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]