This is an automated email from the ASF dual-hosted git repository.
xddeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/master by this push:
new bb00f16 Refine the DB query logics in www.views.task_stats() (#12707)
bb00f16 is described below
commit bb00f164da6c84f078e66582217832aeedb2a966
Author: Xiaodong DENG <[email protected]>
AuthorDate: Sun Nov 29 22:22:38 2020 +0100
Refine the DB query logics in www.views.task_stats() (#12707)
* Refine the DB query logics in www.views.task_stats()
- given filter_dag_ids is either allowed_dag_ids, or intersection of
allowed_dag_ids and selected_dag_ids,
no matter if selected_dag_ids is None or not, filter_dag_ids should
ALWAYS be considered into the SQL query.
Currently, if selected_dag_ids is None, the query is actually getting the
full result (then 'filter' at the end).
This means more (unnecessary) data travel between Airflow and DB.
- When we join table A and B with A.id == B.id (default is INNER join), if
we always confirm ALL A.id is in a specific list,
implicitly ALL ids in the result table are already guaranteed in this
specific list as well.
This is why the two redundant .filter() chunks are removed.
Minor performance improvement should be expected.
Meanwhile, this change makes the code cleaner.
---
airflow/www/views.py | 18 ++----------------
1 file changed, 2 insertions(+), 16 deletions(-)
diff --git a/airflow/www/views.py b/airflow/www/views.py
index 1b38174..5fd9009 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -659,10 +659,7 @@ class Airflow(AirflowBaseView): # noqa: D101 pylint:
disable=too-many-public-m
# pylint: enable=comparison-with-callable
# pylint: disable=no-member
- if selected_dag_ids:
- running_dag_run_query_result = running_dag_run_query_result.filter(
- DagRun.dag_id.in_(filter_dag_ids)
- )
+ running_dag_run_query_result =
running_dag_run_query_result.filter(DagRun.dag_id.in_(filter_dag_ids))
# pylint: enable=no-member
running_dag_run_query_result =
running_dag_run_query_result.subquery('running_dag_run')
@@ -678,10 +675,6 @@ class Airflow(AirflowBaseView): # noqa: D101 pylint:
disable=too-many-public-m
running_dag_run_query_result.c.execution_date ==
TaskInstance.execution_date,
),
)
- if selected_dag_ids:
- running_task_instance_query_result =
running_task_instance_query_result.filter(
- TaskInstance.dag_id.in_(filter_dag_ids)
- )
# pylint: enable=no-member
if conf.getboolean('webserver',
'SHOW_RECENT_STATS_FOR_COMPLETED_RUNS', fallback=True):
@@ -694,8 +687,7 @@ class Airflow(AirflowBaseView): # noqa: D101 pylint:
disable=too-many-public-m
)
# pylint: enable=comparison-with-callable
# pylint: disable=no-member
- if selected_dag_ids:
- last_dag_run =
last_dag_run.filter(DagRun.dag_id.in_(filter_dag_ids))
+ last_dag_run =
last_dag_run.filter(DagRun.dag_id.in_(filter_dag_ids))
last_dag_run = last_dag_run.subquery('last_dag_run')
# pylint: enable=no-member
@@ -710,12 +702,6 @@ class Airflow(AirflowBaseView): # noqa: D101 pylint:
disable=too-many-public-m
last_dag_run.c.execution_date ==
TaskInstance.execution_date,
),
)
- # pylint: disable=no-member
- if selected_dag_ids:
- last_task_instance_query_result =
last_task_instance_query_result.filter(
- TaskInstance.dag_id.in_(filter_dag_ids)
- )
- # pylint: enable=no-member
final_task_instance_query_result = union_all(
last_task_instance_query_result,
running_task_instance_query_result