This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v3-0-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-0-test by this push:
     new a7c560546fd [v3-0-test] Significantly speed up Pytest bootstrapping on 
MacOS in Breeze (#51223) (#51234)
a7c560546fd is described below

commit a7c560546fd4d14dd64f42eb4fe4f265d0b01faa
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri May 30 12:26:10 2025 +0200

    [v3-0-test] Significantly speed up Pytest bootstrapping on MacOS in Breeze 
(#51223) (#51234)
    
    Bootstrapping of pytest - especially on MacOS in Breeze could take
    a long time - and it turns out it was because of rglob trying to
    check if any of the pyproject.toml/provider.yaml files changed
    and looking for "deprecation ignores". Both were using rglob, and it
    turned out that even just rglobbing providers folder takes significant
    amount of time with MacOS docker - because of the slow filesystem.
    
    This has been replaced now with:
    
    * for pyproject.toml/provider.yaml - we use the main airflow
      pyproject.toml to know exactly which pyproject.toml/provider.yaml
      to look for (we have them in workspace definition)
    
    * for deprecations_ignores - we hardcode the short list of the ignores
      we have.
    (cherry picked from commit 86b0c82f9b35b186b9c1ab08ac1c2ec6690bc230)
    
    Co-authored-by: Jarek Potiuk <[email protected]>
---
 dev/breeze/src/airflow_breeze/global_constants.py | 32 ++++++++++++++----
 dev/breeze/src/airflow_breeze/utils/path_utils.py |  1 +
 devel-common/src/tests_common/pytest_plugin.py    | 40 ++++++++++++++++++++---
 3 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/dev/breeze/src/airflow_breeze/global_constants.py 
b/dev/breeze/src/airflow_breeze/global_constants.py
index a3f36c52267..41de655a7cb 100644
--- a/dev/breeze/src/airflow_breeze/global_constants.py
+++ b/dev/breeze/src/airflow_breeze/global_constants.py
@@ -20,17 +20,18 @@ Global constants that are used by all other Breeze 
components.
 
 from __future__ import annotations
 
-import itertools
 import json
 import platform
 import subprocess
+from collections.abc import Generator
 from enum import Enum
+from pathlib import Path
 
 from airflow_breeze.utils.functools_cache import clearable_cache
 from airflow_breeze.utils.host_info_utils import Architecture
 from airflow_breeze.utils.path_utils import (
     AIRFLOW_CORE_SOURCES_PATH,
-    AIRFLOW_PROVIDERS_ROOT_PATH,
+    AIRFLOW_PYPROJECT_TOML_FILE_PATH,
     AIRFLOW_ROOT_PATH,
 )
 
@@ -554,9 +555,6 @@ def get_airflow_extras():
 
 
 # Initialize integrations
-ALL_PYPROJECT_TOML_FILES = AIRFLOW_ROOT_PATH.rglob("pyproject.toml")
-ALL_PROVIDER_YAML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
-ALL_PROVIDER_PYPROJECT_TOML_FILES = 
AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
 PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_CORE_SOURCES_PATH / "airflow" / 
"provider_info.schema.json"
 AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH = AIRFLOW_ROOT_PATH / "generated" 
/ "provider_dependencies.json"
 AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH = (
@@ -567,12 +565,34 @@ UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = (
     AIRFLOW_ROOT_PATH / "scripts" / "ci" / "pre_commit" / 
"update_providers_dependencies.py"
 )
 
+ALL_PYPROJECT_TOML_FILES = []
+
+
+def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path, 
None, None]:
+    pyproject_toml_content = 
AIRFLOW_PYPROJECT_TOML_FILE_PATH.read_text().splitlines()
+    in_workspace = False
+    for line in pyproject_toml_content:
+        trimmed_line = line.strip()
+        if not in_workspace and trimmed_line.startswith("[tool.uv.workspace]"):
+            in_workspace = True
+        elif in_workspace:
+            if trimmed_line.startswith("#"):
+                continue
+            if trimmed_line.startswith('"'):
+                path = trimmed_line.split('"')[1]
+                ALL_PYPROJECT_TOML_FILES.append(AIRFLOW_ROOT_PATH / path / 
"pyproject.toml")
+                if trimmed_line.startswith('"providers/'):
+                    yield AIRFLOW_ROOT_PATH / path / "pyproject.toml"
+                    yield AIRFLOW_ROOT_PATH / path / "provider.yaml"
+            elif trimmed_line.startswith("]"):
+                break
+
 
 def _calculate_provider_deps_hash():
     import hashlib
 
     hasher = hashlib.sha256()
-    for file in sorted(itertools.chain(ALL_PROVIDER_PYPROJECT_TOML_FILES, 
ALL_PROVIDER_YAML_FILES)):
+    for file in sorted(get_all_provider_pyproject_toml_provider_yaml_files()):
         hasher.update(file.read_bytes())
     return hasher.hexdigest()
 
diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py 
b/dev/breeze/src/airflow_breeze/utils/path_utils.py
index 72440e56994..47a9cbd95e1 100644
--- a/dev/breeze/src/airflow_breeze/utils/path_utils.py
+++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py
@@ -279,6 +279,7 @@ def find_airflow_root_path_to_operate_on() -> Path:
 
 
 AIRFLOW_ROOT_PATH = find_airflow_root_path_to_operate_on().resolve()
+AIRFLOW_PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml"
 AIRFLOW_CORE_ROOT_PATH = AIRFLOW_ROOT_PATH / "airflow-core"
 AIRFLOW_CORE_SOURCES_PATH = AIRFLOW_CORE_ROOT_PATH / "src"
 AIRFLOW_WWW_DIR = AIRFLOW_CORE_SOURCES_PATH / "airflow" / "www"
diff --git a/devel-common/src/tests_common/pytest_plugin.py 
b/devel-common/src/tests_common/pytest_plugin.py
index bd429defc7e..d445243cc4f 100644
--- a/devel-common/src/tests_common/pytest_plugin.py
+++ b/devel-common/src/tests_common/pytest_plugin.py
@@ -146,6 +146,7 @@ os.environ["_IN_UNIT_TESTS"] = "true"
 
 _airflow_sources = os.getenv("AIRFLOW_SOURCES", None)
 AIRFLOW_ROOT_PATH = (Path(_airflow_sources) if _airflow_sources else 
Path(__file__).parents[3]).resolve()
+AIRFLOW_PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml"
 AIRFLOW_CORE_SOURCES_PATH = AIRFLOW_ROOT_PATH / "airflow-core" / "src"
 AIRFLOW_CORE_TESTS_PATH = AIRFLOW_ROOT_PATH / "airflow-core" / "tests"
 AIRFLOW_PROVIDERS_ROOT_PATH = AIRFLOW_ROOT_PATH / "providers"
@@ -156,17 +157,37 @@ AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH = (
 UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = (
     AIRFLOW_ROOT_PATH / "scripts" / "ci" / "pre_commit" / 
"update_providers_dependencies.py"
 )
-ALL_PROVIDER_YAML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
-ALL_PROVIDER_PYPROJECT_TOML_FILES = 
AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml")
-
 
 # Deliberately copied from breeze - we want to keep it in sync but we do not 
want to import code from
 # Breeze here as we want to do it quickly
+ALL_PYPROJECT_TOML_FILES = []
+
+
+def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path, 
None, None]:
+    pyproject_toml_content = 
AIRFLOW_PYPROJECT_TOML_FILE_PATH.read_text().splitlines()
+    in_workspace = False
+    for line in pyproject_toml_content:
+        trimmed_line = line.strip()
+        if not in_workspace and trimmed_line.startswith("[tool.uv.workspace]"):
+            in_workspace = True
+        elif in_workspace:
+            if trimmed_line.startswith("#"):
+                continue
+            if trimmed_line.startswith('"'):
+                path = trimmed_line.split('"')[1]
+                ALL_PYPROJECT_TOML_FILES.append(AIRFLOW_ROOT_PATH / path / 
"pyproject.toml")
+                if trimmed_line.startswith('"providers/'):
+                    yield AIRFLOW_ROOT_PATH / path / "pyproject.toml"
+                    yield AIRFLOW_ROOT_PATH / path / "provider.yaml"
+            elif trimmed_line.startswith("]"):
+                break
+
+
 def _calculate_provider_deps_hash():
     import hashlib
 
     hasher = hashlib.sha256()
-    for file in sorted(itertools.chain(ALL_PROVIDER_PYPROJECT_TOML_FILES, 
ALL_PROVIDER_YAML_FILES)):
+    for file in sorted(get_all_provider_pyproject_toml_provider_yaml_files()):
         hasher.update(file.read_bytes())
     return hasher.hexdigest()
 
@@ -415,8 +436,17 @@ def initialize_airflow_tests(request):
             sys.exit(1)
 
 
+# for performance reasons, we do not want to rglob deprecation ignore files
+# because in MacOS in docker it takes a lot of time to rglob them
+# so we opt to hardcode the paths here
+DEPRECATIONS_IGNORE_FILES = [
+    AIRFLOW_CORE_TESTS_PATH / "deprecations_ignore.yml",
+    AIRFLOW_ROOT_PATH / "providers" / "google" / "tests" / 
"deprecations_ignore.yml",
+]
+
+
 def _find_all_deprecation_ignore_files() -> list[str]:
-    all_deprecation_ignore_files = 
AIRFLOW_ROOT_PATH.rglob("deprecations_ignore.yml")
+    all_deprecation_ignore_files = DEPRECATIONS_IGNORE_FILES.copy()
     return list(path.as_posix() for path in all_deprecation_ignore_files)
 
 

Reply via email to