ashb commented on a change in pull request #4751: [AIRFLOW-3607] collected
trigger rule dep check per dag run
URL: https://github.com/apache/airflow/pull/4751#discussion_r259281760
##########
File path: airflow/ti_deps/deps/trigger_rule_dep.py
##########
@@ -49,33 +49,49 @@ def _get_dep_statuses(self, ti, session, dep_context):
yield self._passing_status(reason="The task had a dummy trigger
rule set.")
return
- # TODO(unknown): this query becomes quite expensive with dags that
have many
- # tasks. It should be refactored to let the task report to the dag run
and get the
- # aggregates from there.
- qry = (
- session
- .query(
- func.coalesce(func.sum(
- case([(TI.state == State.SUCCESS, 1)], else_=0)), 0),
- func.coalesce(func.sum(
- case([(TI.state == State.SKIPPED, 1)], else_=0)), 0),
- func.coalesce(func.sum(
- case([(TI.state == State.FAILED, 1)], else_=0)), 0),
- func.coalesce(func.sum(
- case([(TI.state == State.UPSTREAM_FAILED, 1)], else_=0)),
0),
- func.count(TI.task_id),
+ successes, skipped, failed, upstream_failed, done = 0, 0, 0, 0, 0
+ if dep_context.finished_tasks is None:
+ qry = (
+ session
+ .query(
+ func.coalesce(func.sum(
+ case([(TI.state == State.SUCCESS, 1)], else_=0)), 0),
+ func.coalesce(func.sum(
+ case([(TI.state == State.SKIPPED, 1)], else_=0)), 0),
+ func.coalesce(func.sum(
+ case([(TI.state == State.FAILED, 1)], else_=0)), 0),
+ func.coalesce(func.sum(
+ case([(TI.state == State.UPSTREAM_FAILED, 1)],
else_=0)), 0),
+ func.count(TI.task_id),
Review comment:
May be another user for State.finished() (and makes me think that
UPSTREAM_FAILED should be in there)
```python
.query(*[
label(state), func.coalesce(func.sum(case([(TI.state ==
state, 1)], else_=0)), 0)
for state in sorted(State.finished())
], label("done", func.count(TI.task_id)))
.filter(
TI.dag_id == ti.dag_id,
TI.task_id.in_(ti.task.upstream_task_ids),
TI.execution_date == ti.execution_date,
TI.state.in_(State.finished()),
)
counts = qry.one()
successes = counts.success
skipped = counts.skipped
failed = counts.failed
upstream_failed = counts.upstream_failed
done = counts.done
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services