pierrejeambrun commented on code in PR #50984:
URL: https://github.com/apache/airflow/pull/50984#discussion_r2106984039
##########
airflow-core/src/airflow/api_fastapi/core_api/routes/public/dags.py:
##########
@@ -115,30 +116,54 @@ def get_dags(
session: SessionDep,
) -> DAGCollectionResponse:
"""Get all DAGs."""
- dag_runs_select = None
+ query = select(DagModel)
- if dag_run_state.value or dag_run_start_date_range.is_active() or
dag_run_end_date_range.is_active():
- dag_runs_select, _ = paginated_select(
- statement=select(DagRun),
+ max_run_id_query = ( # ordering by id will not always be "latest run",
but it's a simplifying assumption
+ select(DagRun.dag_id, func.max(DagRun.id).label("max_dag_run_id"))
Review Comment:
We shouldn't have to do this because it can yield wrong results. We are
capable of emitting a query that will select 1 Run per dag_id, the one that has
the max `start_date` and in case of multiple rows for the same `dag_id` and
same max `start_date` will then choose the single row with the `max_dag_run_id`
as a second criteria.
##########
airflow-core/src/airflow/api_fastapi/core_api/routes/public/dags.py:
##########
@@ -115,30 +116,54 @@ def get_dags(
session: SessionDep,
) -> DAGCollectionResponse:
"""Get all DAGs."""
- dag_runs_select = None
+ query = select(DagModel)
- if dag_run_state.value or dag_run_start_date_range.is_active() or
dag_run_end_date_range.is_active():
- dag_runs_select, _ = paginated_select(
- statement=select(DagRun),
+ max_run_id_query = ( # ordering by id will not always be "latest run",
but it's a simplifying assumption
+ select(DagRun.dag_id, func.max(DagRun.id).label("max_dag_run_id"))
+ .where(DagRun.start_date.is_not(null()))
+ .group_by(DagRun.dag_id)
+ .subquery(name="mrq")
+ )
+
+ has_max_run_filter = (
+ dag_run_state.value
+ or last_dag_run_state.value
+ or dag_run_start_date_range.is_active()
+ or dag_run_end_date_range.is_active()
+ )
+
+ if has_max_run_filter or order_by.value in (
+ "last_run_state",
+ "last_run_start_date",
+ "-last_run_state",
+ "-last_run_start_date",
+ ):
+ query = query.join(
+ max_run_id_query,
+ DagModel.dag_id == max_run_id_query.c.dag_id,
+ isouter=True,
+ ).join(DagRun, DagRun.id == max_run_id_query.c.max_dag_run_id,
isouter=True)
Review Comment:
I mean always do the join without condition.
But disregard my comment that's not important.
##########
airflow-core/src/airflow/api_fastapi/core_api/routes/public/dags.py:
##########
@@ -115,30 +116,54 @@ def get_dags(
session: SessionDep,
) -> DAGCollectionResponse:
"""Get all DAGs."""
- dag_runs_select = None
+ query = select(DagModel)
- if dag_run_state.value or dag_run_start_date_range.is_active() or
dag_run_end_date_range.is_active():
- dag_runs_select, _ = paginated_select(
- statement=select(DagRun),
+ max_run_id_query = ( # ordering by id will not always be "latest run",
but it's a simplifying assumption
+ select(DagRun.dag_id, func.max(DagRun.id).label("max_dag_run_id"))
+ .where(DagRun.start_date.is_not(null()))
+ .group_by(DagRun.dag_id)
+ .subquery(name="mrq")
Review Comment:
The subquery name doesn't seem used. Also just from a db debugging
perspective (inspected the output sql statement)`mrq` without context doesn't
mean anything.
I think we should remove it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]