This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v3-0-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-0-test by this push:
new a7c560546fd [v3-0-test] Significantly speed up Pytest bootstrapping on
MacOS in Breeze (#51223) (#51234)
a7c560546fd is described below
commit a7c560546fd4d14dd64f42eb4fe4f265d0b01faa
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri May 30 12:26:10 2025 +0200
[v3-0-test] Significantly speed up Pytest bootstrapping on MacOS in Breeze
(#51223) (#51234)
Bootstrapping of pytest - especially on MacOS in Breeze could take
a long time - and it turns out it was because of rglob trying to
check if any of the pyproject.toml/provider.yaml files changed
and looking for "deprecation ignores". Both were using rglob, and it
turned out that even just rglobbing providers folder takes significant
amount of time with MacOS docker - because of the slow filesystem.
This has been replaced now with:
* for pyproject.toml/provider.yaml - we use the main airflow
pyproject.toml to know exactly which pyproject.toml/provider.yaml
to look for (we have them in workspace definition)
* for deprecations_ignores - we hardcode the short list of the ignores
we have.
(cherry picked from commit 86b0c82f9b35b186b9c1ab08ac1c2ec6690bc230)
Co-authored-by: Jarek Potiuk <[email protected]>
---
dev/breeze/src/airflow_breeze/global_constants.py | 32 ++++++++++++++----
dev/breeze/src/airflow_breeze/utils/path_utils.py | 1 +
devel-common/src/tests_common/pytest_plugin.py | 40 ++++++++++++++++++++---
3 files changed, 62 insertions(+), 11 deletions(-)
diff --git a/dev/breeze/src/airflow_breeze/global_constants.py
b/dev/breeze/src/airflow_breeze/global_constants.py
index a3f36c52267..41de655a7cb 100644
--- a/dev/breeze/src/airflow_breeze/global_constants.py
+++ b/dev/breeze/src/airflow_breeze/global_constants.py
@@ -20,17 +20,18 @@ Global constants that are used by all other Breeze
components.
from __future__ import annotations
-import itertools
import json
import platform
import subprocess
+from collections.abc import Generator
from enum import Enum
+from pathlib import Path
from airflow_breeze.utils.functools_cache import clearable_cache
from airflow_breeze.utils.host_info_utils import Architecture
from airflow_breeze.utils.path_utils import (
AIRFLOW_CORE_SOURCES_PATH,
- AIRFLOW_PROVIDERS_ROOT_PATH,
+ AIRFLOW_PYPROJECT_TOML_FILE_PATH,
AIRFLOW_ROOT_PATH,
)
@@ -554,9 +555,6 @@ def get_airflow_extras():
# Initialize integrations
-ALL_PYPROJECT_TOML_FILES = AIRFLOW_ROOT_PATH.rglob("pyproject.toml")
-ALL_PROVIDER_YAML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
-ALL_PROVIDER_PYPROJECT_TOML_FILES =
AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_CORE_SOURCES_PATH / "airflow" /
"provider_info.schema.json"
AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH = AIRFLOW_ROOT_PATH / "generated"
/ "provider_dependencies.json"
AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH = (
@@ -567,12 +565,34 @@ UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = (
AIRFLOW_ROOT_PATH / "scripts" / "ci" / "pre_commit" /
"update_providers_dependencies.py"
)
+ALL_PYPROJECT_TOML_FILES = []
+
+
+def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path,
None, None]:
+ pyproject_toml_content =
AIRFLOW_PYPROJECT_TOML_FILE_PATH.read_text().splitlines()
+ in_workspace = False
+ for line in pyproject_toml_content:
+ trimmed_line = line.strip()
+ if not in_workspace and trimmed_line.startswith("[tool.uv.workspace]"):
+ in_workspace = True
+ elif in_workspace:
+ if trimmed_line.startswith("#"):
+ continue
+ if trimmed_line.startswith('"'):
+ path = trimmed_line.split('"')[1]
+ ALL_PYPROJECT_TOML_FILES.append(AIRFLOW_ROOT_PATH / path /
"pyproject.toml")
+ if trimmed_line.startswith('"providers/'):
+ yield AIRFLOW_ROOT_PATH / path / "pyproject.toml"
+ yield AIRFLOW_ROOT_PATH / path / "provider.yaml"
+ elif trimmed_line.startswith("]"):
+ break
+
def _calculate_provider_deps_hash():
import hashlib
hasher = hashlib.sha256()
- for file in sorted(itertools.chain(ALL_PROVIDER_PYPROJECT_TOML_FILES,
ALL_PROVIDER_YAML_FILES)):
+ for file in sorted(get_all_provider_pyproject_toml_provider_yaml_files()):
hasher.update(file.read_bytes())
return hasher.hexdigest()
diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py
b/dev/breeze/src/airflow_breeze/utils/path_utils.py
index 72440e56994..47a9cbd95e1 100644
--- a/dev/breeze/src/airflow_breeze/utils/path_utils.py
+++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py
@@ -279,6 +279,7 @@ def find_airflow_root_path_to_operate_on() -> Path:
AIRFLOW_ROOT_PATH = find_airflow_root_path_to_operate_on().resolve()
+AIRFLOW_PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml"
AIRFLOW_CORE_ROOT_PATH = AIRFLOW_ROOT_PATH / "airflow-core"
AIRFLOW_CORE_SOURCES_PATH = AIRFLOW_CORE_ROOT_PATH / "src"
AIRFLOW_WWW_DIR = AIRFLOW_CORE_SOURCES_PATH / "airflow" / "www"
diff --git a/devel-common/src/tests_common/pytest_plugin.py
b/devel-common/src/tests_common/pytest_plugin.py
index bd429defc7e..d445243cc4f 100644
--- a/devel-common/src/tests_common/pytest_plugin.py
+++ b/devel-common/src/tests_common/pytest_plugin.py
@@ -146,6 +146,7 @@ os.environ["_IN_UNIT_TESTS"] = "true"
_airflow_sources = os.getenv("AIRFLOW_SOURCES", None)
AIRFLOW_ROOT_PATH = (Path(_airflow_sources) if _airflow_sources else
Path(__file__).parents[3]).resolve()
+AIRFLOW_PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml"
AIRFLOW_CORE_SOURCES_PATH = AIRFLOW_ROOT_PATH / "airflow-core" / "src"
AIRFLOW_CORE_TESTS_PATH = AIRFLOW_ROOT_PATH / "airflow-core" / "tests"
AIRFLOW_PROVIDERS_ROOT_PATH = AIRFLOW_ROOT_PATH / "providers"
@@ -156,17 +157,37 @@ AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH = (
UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = (
AIRFLOW_ROOT_PATH / "scripts" / "ci" / "pre_commit" /
"update_providers_dependencies.py"
)
-ALL_PROVIDER_YAML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
-ALL_PROVIDER_PYPROJECT_TOML_FILES =
AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
-
# Deliberately copied from breeze - we want to keep it in sync but we do not
want to import code from
# Breeze here as we want to do it quickly
+ALL_PYPROJECT_TOML_FILES = []
+
+
+def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path,
None, None]:
+ pyproject_toml_content =
AIRFLOW_PYPROJECT_TOML_FILE_PATH.read_text().splitlines()
+ in_workspace = False
+ for line in pyproject_toml_content:
+ trimmed_line = line.strip()
+ if not in_workspace and trimmed_line.startswith("[tool.uv.workspace]"):
+ in_workspace = True
+ elif in_workspace:
+ if trimmed_line.startswith("#"):
+ continue
+ if trimmed_line.startswith('"'):
+ path = trimmed_line.split('"')[1]
+ ALL_PYPROJECT_TOML_FILES.append(AIRFLOW_ROOT_PATH / path /
"pyproject.toml")
+ if trimmed_line.startswith('"providers/'):
+ yield AIRFLOW_ROOT_PATH / path / "pyproject.toml"
+ yield AIRFLOW_ROOT_PATH / path / "provider.yaml"
+ elif trimmed_line.startswith("]"):
+ break
+
+
def _calculate_provider_deps_hash():
import hashlib
hasher = hashlib.sha256()
- for file in sorted(itertools.chain(ALL_PROVIDER_PYPROJECT_TOML_FILES,
ALL_PROVIDER_YAML_FILES)):
+ for file in sorted(get_all_provider_pyproject_toml_provider_yaml_files()):
hasher.update(file.read_bytes())
return hasher.hexdigest()
@@ -415,8 +436,17 @@ def initialize_airflow_tests(request):
sys.exit(1)
+# for performance reasons, we do not want to rglob deprecation ignore files
+# because in MacOS in docker it takes a lot of time to rglob them
+# so we opt to hardcode the paths here
+DEPRECATIONS_IGNORE_FILES = [
+ AIRFLOW_CORE_TESTS_PATH / "deprecations_ignore.yml",
+ AIRFLOW_ROOT_PATH / "providers" / "google" / "tests" /
"deprecations_ignore.yml",
+]
+
+
def _find_all_deprecation_ignore_files() -> list[str]:
- all_deprecation_ignore_files =
AIRFLOW_ROOT_PATH.rglob("deprecations_ignore.yml")
+ all_deprecation_ignore_files = DEPRECATIONS_IGNORE_FILES.copy()
return list(path.as_posix() for path in all_deprecation_ignore_files)