This is an automated email from the ASF dual-hosted git repository. ephraimanierobi pushed a commit to branch v3-1-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit d99e3d440f92cdc39d62e19c892221a6b2b24703 Author: Pierre Jeambrun <[email protected]> AuthorDate: Thu Dec 4 18:18:17 2025 +0100 Backport (#59052) --- airflow-core/docs/howto/index.rst | 1 + airflow-core/docs/howto/performance.rst | 55 +++++++++++++++ .../src/airflow/api_fastapi/common/parameters.py | 7 +- .../core_api/routes/public/test_dags.py | 80 ++++++++++++++-------- .../core_api/routes/public/test_event_logs.py | 53 ++++++++++---- .../core_api/routes/public/test_hitl.py | 4 ++ .../core_api/routes/public/test_task_instances.py | 4 +- 7 files changed, 153 insertions(+), 51 deletions(-) diff --git a/airflow-core/docs/howto/index.rst b/airflow-core/docs/howto/index.rst index 9abc9caa76e..0d4f79e74e5 100644 --- a/airflow-core/docs/howto/index.rst +++ b/airflow-core/docs/howto/index.rst @@ -56,3 +56,4 @@ configuring an Airflow environment. dynamic-dag-generation docker-compose/index run-with-self-signed-certificate + performance diff --git a/airflow-core/docs/howto/performance.rst b/airflow-core/docs/howto/performance.rst new file mode 100644 index 00000000000..1f7d77d85dc --- /dev/null +++ b/airflow-core/docs/howto/performance.rst @@ -0,0 +1,55 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Performance tuning (API and UI) +=============================== + +This guide collects pragmatic tips that improve Airflow performance for API and UI workloads. + +Custom metadata indexes +----------------------- + +If you observe slowness in some API calls or specific UI views, you should inspect query plans and add indexes yourself +that match your workload. Listing endpoints and UI table views with specific ordering criteria are likely +to benefit from additional indexes if you have a large volume of metadata. + +When to use +^^^^^^^^^^^ + +- Slow API list/detail endpoints caused by frequent scans or lookups on columns like ``start_date``, timestamps (e.g. ``dttm``), or status fields. +- UI pages that load large lists or perform heavy filtering on metadata tables. + +Guidance +^^^^^^^^ + +- Inspect the query planner (e.g., ``EXPLAIN``/``EXPLAIN ANALYZE``) for slow endpoints and identify missing indexes. +- Prefer single or composite indexes that match your most common ordering logic, typically the ``order_by`` + query parameter used in API calls. Composite indexes can cover multi criteria ordering. +- Your optimal indexes depend on how you use the API and UI; there is no one-size-fits-all set we can ship by default. + +Upgrade considerations +^^^^^^^^^^^^^^^^^^^^^^ + +To avoid conflicts with Airflow database upgrades, delete your custom indexes before running an Airflow DB upgrade +and re-apply them after the upgrade succeeds. + +Notes +^^^^^ + +- Review query plans (e.g. via ``EXPLAIN``) to choose effective column sets and ordering for your workload. +- Composite indexes should list columns in selectivity order appropriate to your most common predicates. +- Indexes incur write overhead; add only those that materially improve your read paths. diff --git a/airflow-core/src/airflow/api_fastapi/common/parameters.py b/airflow-core/src/airflow/api_fastapi/common/parameters.py index 668e6721f87..123b6d9841f 100644 --- a/airflow-core/src/airflow/api_fastapi/common/parameters.py +++ b/airflow-core/src/airflow/api_fastapi/common/parameters.py @@ -34,7 +34,7 @@ from typing import ( from fastapi import Depends, HTTPException, Query, status from pendulum.parsing.exceptions import ParserError from pydantic import AfterValidator, BaseModel, NonNegativeInt -from sqlalchemy import Column, and_, case, func, not_, or_, select as sql_select +from sqlalchemy import Column, and_, func, not_, or_, select as sql_select from sqlalchemy.inspection import inspect from airflow._shared.timezones import timezone @@ -248,11 +248,6 @@ class SortParam(BaseParam[list[str]]): if column is None: column = getattr(self.model, lstriped_orderby) - # MySQL does not support `nullslast`, and True/False ordering depends on the - # database implementation. - nullscheck = case((column.isnot(None), 0), else_=1) - - columns.append(nullscheck) if order_by_value.startswith("-"): columns.append(column.desc()) else: diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py index 78293779a09..cd843b336b6 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dags.py @@ -383,36 +383,6 @@ class TestGetDags(TestDagEndpoint): 2, [DAG1_ID, DAG2_ID], ), - ( - {"order_by": "next_dagrun", "exclude_stale": False}, - 3, - [DAG3_ID, DAG1_ID, DAG2_ID], - ), - ( - {"order_by": "last_run_state", "exclude_stale": False}, - 3, - [DAG1_ID, DAG3_ID, DAG2_ID], - ), - ( - {"order_by": "-last_run_state", "exclude_stale": False}, - 3, - [DAG3_ID, DAG1_ID, DAG2_ID], - ), - ( - {"order_by": "last_run_start_date", "exclude_stale": False}, - 3, - [DAG1_ID, DAG3_ID, DAG2_ID], - ), - ( - {"order_by": "-last_run_start_date", "exclude_stale": False}, - 3, - [DAG3_ID, DAG1_ID, DAG2_ID], - ), - ( - {"order_by": ["next_dagrun", "-dag_display_name"], "exclude_stale": False}, - 3, - [DAG3_ID, DAG2_ID, DAG1_ID], - ), # Search ({"dag_id_pattern": "1"}, 1, [DAG1_ID]), ({"dag_display_name_pattern": "test_dag2"}, 1, [DAG2_ID]), @@ -451,6 +421,56 @@ class TestGetDags(TestDagEndpoint): assert actual_ids == expected_ids + # Ordering of nulls values is DB specific. + @pytest.mark.backend("sqlite") + @pytest.mark.parametrize( + ("query_params", "expected_total_entries", "expected_ids"), + [ + ( + {"order_by": "next_dagrun", "exclude_stale": False}, + 3, + [DAG1_ID, DAG2_ID, DAG3_ID], + ), + ( + {"order_by": "last_run_state", "exclude_stale": False}, + 3, + [DAG2_ID, DAG1_ID, DAG3_ID], + ), + ( + {"order_by": "-last_run_state", "exclude_stale": False}, + 3, + [DAG3_ID, DAG1_ID, DAG2_ID], + ), + ( + {"order_by": "last_run_start_date", "exclude_stale": False}, + 3, + [DAG2_ID, DAG1_ID, DAG3_ID], + ), + ( + {"order_by": "-last_run_start_date", "exclude_stale": False}, + 3, + [DAG3_ID, DAG1_ID, DAG2_ID], + ), + ( + {"order_by": ["next_dagrun", "-dag_display_name"], "exclude_stale": False}, + 3, + [DAG2_ID, DAG1_ID, DAG3_ID], + ), + ], + ) + def test_get_dags_with_nullable_fields( + self, test_client, query_params, expected_total_entries, expected_ids, session + ): + with assert_queries_count(4): + response = test_client.get("/dags", params=query_params) + assert response.status_code == 200 + body = response.json() + + assert body["total_entries"] == expected_total_entries + actual_ids = [dag["dag_id"] for dag in body["dags"]] + + assert actual_ids == expected_ids + @mock.patch("airflow.api_fastapi.auth.managers.base_auth_manager.BaseAuthManager.get_authorized_dag_ids") def test_get_dags_should_call_authorized_dag_ids(self, mock_get_authorized_dag_ids, test_client): mock_get_authorized_dag_ids.return_value = {DAG1_ID, DAG2_ID} diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_event_logs.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_event_logs.py index e46a355c0f7..dd87e8d7fac 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_event_logs.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_event_logs.py @@ -279,20 +279,6 @@ class TestGetEventLogs(TestEventLogsEndpoint): 4, [EVENT_NORMAL, EVENT_WITH_OWNER, TASK_INSTANCE_EVENT, EVENT_WITH_OWNER_AND_TASK_INSTANCE], ), - # order_by - ( - {"order_by": "-id"}, - 200, - 4, - [EVENT_WITH_OWNER_AND_TASK_INSTANCE, TASK_INSTANCE_EVENT, EVENT_WITH_OWNER, EVENT_NORMAL], - ), - ( - {"order_by": "logical_date"}, - 200, - 4, - [TASK_INSTANCE_EVENT, EVENT_WITH_OWNER_AND_TASK_INSTANCE, EVENT_NORMAL, EVENT_WITH_OWNER], - ), - # combination of query parameters ( {"offset": 1, "excluded_events": ["non_existed_event"], "order_by": "event"}, 200, @@ -327,6 +313,45 @@ class TestGetEventLogs(TestEventLogsEndpoint): for event_log, expected_event in zip(resp_json["event_logs"], expected_events): assert event_log["event"] == expected_event + # Ordering of nulls values is DB specific. + @pytest.mark.backend("sqlite") + @pytest.mark.parametrize( + ("query_params", "expected_status_code", "expected_total_entries", "expected_events"), + [ + ( + {"order_by": "-id"}, + 200, + 4, + [EVENT_WITH_OWNER_AND_TASK_INSTANCE, TASK_INSTANCE_EVENT, EVENT_WITH_OWNER, EVENT_NORMAL], + ), + ( + {"order_by": "logical_date"}, + 200, + 4, + [EVENT_NORMAL, EVENT_WITH_OWNER, TASK_INSTANCE_EVENT, EVENT_WITH_OWNER_AND_TASK_INSTANCE], + ), + ( + {"order_by": "-logical_date"}, + 200, + 4, + [EVENT_WITH_OWNER_AND_TASK_INSTANCE, TASK_INSTANCE_EVENT, EVENT_WITH_OWNER, EVENT_NORMAL], + ), + ], + ) + def test_get_event_logs_order_by( + self, test_client, query_params, expected_status_code, expected_total_entries, expected_events + ): + with assert_queries_count(2): + response = test_client.get("/eventLogs", params=query_params) + assert response.status_code == expected_status_code + if expected_status_code != 200: + return + + resp_json = response.json() + assert resp_json["total_entries"] == expected_total_entries + for event_log, expected_event in zip(resp_json["event_logs"], expected_events): + assert event_log["event"] == expected_event + def test_should_raises_401_unauthenticated(self, unauthenticated_test_client): response = unauthenticated_test_client.get("/eventLogs") assert response.status_code == 401 diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_hitl.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_hitl.py index 4c10feee7d6..e025ea50969 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_hitl.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_hitl.py @@ -695,6 +695,10 @@ class TestGetHITLDetailsEndpoint: reverse=reverse, ) + # Remove entries with None, because None orders depends on the DB implementation + hitl_details = [d for d in hitl_details if get_key_lambda(d) is not None] + sorted_hitl_details = [d for d in sorted_hitl_details if get_key_lambda(d) is not None] + assert hitl_details == sorted_hitl_details def test_should_respond_200_without_response(self, test_client: TestClient) -> None: diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 6f743cd7d4b..a353b110791 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -813,10 +813,12 @@ class TestGetMappedTaskInstances: assert len(body["task_instances"]) == params["limit"] assert expected_map_indexes == [ti["map_index"] for ti in body["task_instances"]] + # Ordering of nulls values is DB specific. + @pytest.mark.backend("sqlite") @pytest.mark.parametrize( "params, expected_map_indexes", [ - ({"order_by": "rendered_map_index", "limit": 108}, [0] + list(range(1, 108))), # Asc + ({"order_by": "rendered_map_index", "limit": 108}, list(range(1, 109))), # Asc ({"order_by": "-rendered_map_index", "limit": 100}, [0] + list(range(11, 110)[::-1])), # Desc ], )
