This is an automated email from the ASF dual-hosted git repository.

xddeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new bb00f16  Refine the DB query logics in www.views.task_stats() (#12707)
bb00f16 is described below

commit bb00f164da6c84f078e66582217832aeedb2a966
Author: Xiaodong DENG <[email protected]>
AuthorDate: Sun Nov 29 22:22:38 2020 +0100

    Refine the DB query logics in www.views.task_stats() (#12707)
    
    * Refine the DB query logics in www.views.task_stats()
    
    - given filter_dag_ids is either allowed_dag_ids, or intersection of 
allowed_dag_ids and selected_dag_ids,
      no matter if selected_dag_ids is None or not, filter_dag_ids should 
ALWAYS be considered into the SQL query.
    
      Currently, if selected_dag_ids is None, the query is actually getting the 
full result (then 'filter' at the end).
      This means more (unnecessary) data travel between Airflow and DB.
    
    - When we join table A and B with A.id == B.id (default is INNER join), if 
we always confirm ALL A.id is in a specific list,
      implicitly ALL ids in the result table are already guaranteed in this 
specific list as well.
      This is why the two redundant .filter() chunks are removed.
    
    Minor performance improvement should be expected.
    Meanwhile, this change makes the code cleaner.
---
 airflow/www/views.py | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/airflow/www/views.py b/airflow/www/views.py
index 1b38174..5fd9009 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -659,10 +659,7 @@ class Airflow(AirflowBaseView):  # noqa: D101  pylint: 
disable=too-many-public-m
         # pylint: enable=comparison-with-callable
 
         # pylint: disable=no-member
-        if selected_dag_ids:
-            running_dag_run_query_result = running_dag_run_query_result.filter(
-                DagRun.dag_id.in_(filter_dag_ids)
-            )
+        running_dag_run_query_result = 
running_dag_run_query_result.filter(DagRun.dag_id.in_(filter_dag_ids))
         # pylint: enable=no-member
 
         running_dag_run_query_result = 
running_dag_run_query_result.subquery('running_dag_run')
@@ -678,10 +675,6 @@ class Airflow(AirflowBaseView):  # noqa: D101  pylint: 
disable=too-many-public-m
                 running_dag_run_query_result.c.execution_date == 
TaskInstance.execution_date,
             ),
         )
-        if selected_dag_ids:
-            running_task_instance_query_result = 
running_task_instance_query_result.filter(
-                TaskInstance.dag_id.in_(filter_dag_ids)
-            )
         # pylint: enable=no-member
 
         if conf.getboolean('webserver', 
'SHOW_RECENT_STATS_FOR_COMPLETED_RUNS', fallback=True):
@@ -694,8 +687,7 @@ class Airflow(AirflowBaseView):  # noqa: D101  pylint: 
disable=too-many-public-m
             )
             # pylint: enable=comparison-with-callable
             # pylint: disable=no-member
-            if selected_dag_ids:
-                last_dag_run = 
last_dag_run.filter(DagRun.dag_id.in_(filter_dag_ids))
+            last_dag_run = 
last_dag_run.filter(DagRun.dag_id.in_(filter_dag_ids))
             last_dag_run = last_dag_run.subquery('last_dag_run')
             # pylint: enable=no-member
 
@@ -710,12 +702,6 @@ class Airflow(AirflowBaseView):  # noqa: D101  pylint: 
disable=too-many-public-m
                     last_dag_run.c.execution_date == 
TaskInstance.execution_date,
                 ),
             )
-            # pylint: disable=no-member
-            if selected_dag_ids:
-                last_task_instance_query_result = 
last_task_instance_query_result.filter(
-                    TaskInstance.dag_id.in_(filter_dag_ids)
-                )
-            # pylint: enable=no-member
 
             final_task_instance_query_result = union_all(
                 last_task_instance_query_result, 
running_task_instance_query_result

Reply via email to