This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v2-8-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 594f2aabad973372ed0284e680b50e3e397dccd9
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sat Feb 10 17:08:35 2024 +0100

    Optimize CI builds for unimportant pyproject.toml changes (#37305)
    
    When dependencies change in pyproject.toml, we should run build
    with `upgrade-to-newer-dependencies`, however we should not run
    it when dependencies in pyproject.toml do not change.
    
    That saves about 30 minutes of elapsed time of the build and heavily
    limits the number of tests executed. It takes about 30 minutes now
    to build the image that has "upgrade-to-newer-dependencies", we only
    usually run default Python image in such case and we do not run many
    tests that are not needed (for example K8S tests).
    
    This PR optimizes out the case where non-dependency changes only are
    done in pyproject.toml. This happens for example when you only change
    docstrings and remove ruff rules in [[tools.ruff]] section.
    
    We compare the dependencies and optional dependencies coming from
    the change and only when there is a change in those, we set the
    `upgrade-to-newer-dependencies` flag. We also print what changed.
    
    Similarly full-tests-needed is only set when build-system changes
    or when dependencies change, because then we want to make sure new
    dependencies are working on all Python versions.
    
    (cherry picked from commit 6ce5225230136aaf50999af78a6599f759b18493)
---
 .github/workflows/ci.yml                           |   2 +
 dev/breeze/README.md                               |   2 +-
 dev/breeze/pyproject.toml                          |   1 +
 .../src/airflow_breeze/utils/selective_checks.py   | 132 +++++++++++++++++++--
 dev/breeze/tests/test_selective_checks.py          |  84 ++++++++++++-
 5 files changed, 206 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1a91119028..d636b2eee4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -315,6 +315,8 @@ jobs:
         run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm 
-rf /workspace/*"
       - uses: actions/checkout@v4
         with:
+          # Need to fetch all history for selective checks tests
+          fetch-depth: 0
           persist-credentials: false
       - uses: actions/setup-python@v4
         with:
diff --git a/dev/breeze/README.md b/dev/breeze/README.md
index de9fa17911..a6e6419c36 100644
--- a/dev/breeze/README.md
+++ b/dev/breeze/README.md
@@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY 
UPDATED BY PRE-COMMIT.
 
 
---------------------------------------------------------------------------------------------------------
 
-Package config hash: 
0a4cc814e27e822622708d862952a5b411a1a4ad8f3bca8fa591f39ed670ab6636de3caf7c1d072896c411c7eef824f887202b8de8729b8622f2af9b84a154b3
+Package config hash: 
e2db123fd25e40b515520fb9f6ed32a601fe85e6a22b9845343bc5c81e5785b472527ed3bf6d07521c512d2222f99877a1ce1911ac504a3a6af7b7866aa674cc
 
 
---------------------------------------------------------------------------------------------------------
diff --git a/dev/breeze/pyproject.toml b/dev/breeze/pyproject.toml
index 214f5e6478..5a43c3b560 100644
--- a/dev/breeze/pyproject.toml
+++ b/dev/breeze/pyproject.toml
@@ -69,6 +69,7 @@ dependencies = [
     "rich>=13.6.0",
     "semver>=3.0.2",
     "tabulate>=0.9.0",
+    "tomli>=2.0.1; python_version < '3.11'",
     "twine>=4.0.2",
 ]
 
diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py 
b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index ea2667f07c..80d2955df6 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -17,6 +17,7 @@
 
 from __future__ import annotations
 
+import difflib
 import json
 import os
 import re
@@ -63,6 +64,7 @@ from airflow_breeze.utils.path_utils import (
     TESTS_PROVIDERS_ROOT,
 )
 from airflow_breeze.utils.provider_dependencies import DEPENDENCIES, 
get_related_providers
+from airflow_breeze.utils.run_utils import run_command
 
 FULL_TESTS_NEEDED_LABEL = "full tests needed"
 DEBUG_CI_RESOURCES_LABEL = "debug ci resources"
@@ -86,7 +88,7 @@ class FileGroupForCi(Enum):
     API_TEST_FILES = "api_test_files"
     API_CODEGEN_FILES = "api_codegen_files"
     HELM_FILES = "helm_files"
-    SETUP_FILES = "setup_files"
+    DEPENDENCY_FILES = "dependency_files"
     DOC_FILES = "doc_files"
     WWW_FILES = "www_files"
     SYSTEM_TEST_FILES = "system_tests"
@@ -116,7 +118,6 @@ CI_FILE_GROUP_MATCHES = HashableDict(
             r"^dev/.*\.py$",
             r"^Dockerfile",
             r"^scripts",
-            r"^pyproject.toml",
             r"^generated/provider_dependencies.json$",
         ],
         FileGroupForCi.PYTHON_PRODUCTION_FILES: [
@@ -141,8 +142,7 @@ CI_FILE_GROUP_MATCHES = HashableDict(
             r"^tests/kubernetes",
             r"^helm_tests",
         ],
-        FileGroupForCi.SETUP_FILES: [
-            r"^pyproject.toml",
+        FileGroupForCi.DEPENDENCY_FILES: [
             r"^generated/provider_dependencies.json$",
         ],
         FileGroupForCi.DOC_FILES: [
@@ -374,6 +374,8 @@ class SelectiveChecks:
         self._github_repository = github_repository
         self._github_actor = github_actor
         self._github_context_dict = github_context_dict or {}
+        self._new_toml: dict[str, Any] = {}
+        self._old_toml: dict[str, Any] = {}
 
     def __important_attributes(self) -> tuple[Any, ...]:
         return tuple(getattr(self, f) for f in self.__HASHABLE_FIELDS)
@@ -427,6 +429,13 @@ class SelectiveChecks:
         ):
             get_console().print("[warning]Running everything because env files 
changed[/]")
             return True
+        if self.pyproject_toml_changed:
+            if self.build_system_changed_in_pyproject_toml:
+                get_console().print("[warning]Running everything: build-system 
changed in pyproject.toml[/]")
+                return True
+            if self.dependencies_changed_in_pyproject_toml:
+                get_console().print("[warning]Running everything: dependencies 
changed in pyproject.toml[/]")
+                return True
         if FULL_TESTS_NEEDED_LABEL in self._pr_labels:
             get_console().print(
                 "[warning]Full tests needed because "
@@ -808,18 +817,123 @@ class SelectiveChecks:
     def basic_checks_only(self) -> bool:
         return not self.ci_image_build
 
+    @staticmethod
+    def _print_diff(old_lines: list[str], new_lines: list[str]):
+        diff = "\n".join([line for line in difflib.ndiff(old_lines, new_lines) 
if line and line[0] in "+-?"])
+        get_console().print(diff)
+
+    @cached_property
+    def pyproject_toml_changed(self) -> bool:
+        if not self._commit_ref:
+            get_console().print("[warning]Cannot determine pyproject.toml 
changes as commit is missing[/]")
+            return False
+        new_result = run_command(
+            ["git", "show", f"{self._commit_ref}:pyproject.toml"],
+            capture_output=True,
+            text=True,
+            cwd=AIRFLOW_SOURCES_ROOT,
+            check=False,
+        )
+        if new_result.returncode != 0:
+            get_console().print(
+                f"[warning]Cannot determine pyproject.toml changes. "
+                f"Could not get pyproject.toml from {self._commit_ref}[/]"
+            )
+            return False
+        old_result = run_command(
+            ["git", "show", f"{self._commit_ref}^:pyproject.toml"],
+            capture_output=True,
+            text=True,
+            cwd=AIRFLOW_SOURCES_ROOT,
+            check=False,
+        )
+        if old_result.returncode != 0:
+            get_console().print(
+                f"[warning]Cannot determine pyproject.toml changes. "
+                f"Could not get pyproject.toml from {self._commit_ref}^[/]"
+            )
+            return False
+        try:
+            import tomllib
+        except ImportError:
+            import tomli as tomllib
+
+        self._new_toml = tomllib.loads(new_result.stdout)
+        self._old_toml = tomllib.loads(old_result.stdout)
+        return True
+
+    @cached_property
+    def build_system_changed_in_pyproject_toml(self) -> bool:
+        if not self.pyproject_toml_changed:
+            return False
+        new_build_backend = self._new_toml["build-system"]["build-backend"]
+        old_build_backend = self._old_toml["build-system"]["build-backend"]
+        if new_build_backend != old_build_backend:
+            get_console().print("[warning]Build backend changed in 
pyproject.toml [/]")
+            self._print_diff([old_build_backend], [new_build_backend])
+            return True
+        new_requires = self._new_toml["build-system"]["requires"]
+        old_requires = self._old_toml["build-system"]["requires"]
+        if new_requires != old_requires:
+            get_console().print("[warning]Build system changed in 
pyproject.toml [/]")
+            self._print_diff(old_requires, new_requires)
+            return True
+        return False
+
+    @cached_property
+    def dependencies_changed_in_pyproject_toml(self) -> bool:
+        if not self.pyproject_toml_changed:
+            return False
+        new_deps = self._new_toml["project"]["dependencies"]
+        old_deps = self._old_toml["project"]["dependencies"]
+        if new_deps != old_deps:
+            get_console().print("[warning]Project dependencies changed [/]")
+            self._print_diff(old_deps, new_deps)
+            return True
+        new_optional_deps = 
self._new_toml["project"].get("optional-dependencies", {})
+        old_optional_deps = 
self._old_toml["project"].get("optional-dependencies", {})
+        if new_optional_deps != old_optional_deps:
+            get_console().print("[warning]Optional dependencies changed [/]")
+            all_dep_keys = 
set(new_optional_deps.keys()).union(old_optional_deps.keys())
+            for dep in all_dep_keys:
+                if new_optional_deps.get(dep) != old_optional_deps.get(dep):
+                    get_console().print(f"[warning]Optional dependency {dep} 
changed[/]")
+                    self._print_diff(old_optional_deps.get(dep, []), 
new_optional_deps.get(dep, []))
+            return True
+        return False
+
     @cached_property
     def upgrade_to_newer_dependencies(self) -> bool:
-        return (
+        if (
             len(
                 self._matching_files(
-                    FileGroupForCi.SETUP_FILES, CI_FILE_GROUP_MATCHES, 
CI_FILE_GROUP_EXCLUDES
+                    FileGroupForCi.DEPENDENCY_FILES, CI_FILE_GROUP_MATCHES, 
CI_FILE_GROUP_EXCLUDES
                 )
             )
             > 0
-            or self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE]
-            or UPGRADE_TO_NEWER_DEPENDENCIES_LABEL in self._pr_labels
-        )
+        ):
+            get_console().print("[warning]Upgrade to newer dependencies: 
Dependency files changed[/]")
+            return True
+        if self.dependencies_changed_in_pyproject_toml:
+            get_console().print(
+                "[warning]Upgrade to newer dependencies: Dependencies changed 
in pyproject.toml[/]"
+            )
+            return True
+        if self.build_system_changed_in_pyproject_toml:
+            get_console().print(
+                "[warning]Upgrade to newer dependencies: Build system changed 
in pyproject.toml[/]"
+            )
+            return True
+        if self._github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE]:
+            get_console().print("[warning]Upgrade to newer dependencies: Push 
or Schedule event[/]")
+            return True
+        if UPGRADE_TO_NEWER_DEPENDENCIES_LABEL in self._pr_labels:
+            get_console().print(
+                f"[warning]Upgrade to newer dependencies: Label 
'{UPGRADE_TO_NEWER_DEPENDENCIES_LABEL}' "
+                f"in {self._pr_labels}[/]"
+            )
+            return True
+        return False
 
     @cached_property
     def docs_list_as_string(self) -> str | None:
diff --git a/dev/breeze/tests/test_selective_checks.py 
b/dev/breeze/tests/test_selective_checks.py
index 7fc620c1f7..1276e625db 100644
--- a/dev/breeze/tests/test_selective_checks.py
+++ b/dev/breeze/tests/test_selective_checks.py
@@ -426,7 +426,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, 
str], stderr: str):
         ),
         (
             pytest.param(
-                ("pyproject.toml",),
+                ("generated/provider_dependencies.json",),
                 {
                     "affected-providers-list-as-string": 
ALL_PROVIDERS_AFFECTED,
                     "all-python-versions": "['3.8', '3.9', '3.10', '3.11']",
@@ -595,6 +595,53 @@ def test_expected_output_pull_request_main(
     assert_outputs_are_printed(expected_outputs, str(stderr))
 
 
[email protected](
+    "files, commit_ref, expected_outputs",
+    [
+        (
+            pytest.param(
+                ("pyproject.toml",),
+                "2bc8e175b3a4cc84fe33e687f1a00d2a49563090",
+                {
+                    "full-tests-needed": "false",
+                },
+                id="No full tests needed when pyproject.toml changes in 
insignificant way",
+            )
+        ),
+        (
+            pytest.param(
+                ("pyproject.toml",),
+                "90e2b12d6b99d2f7db43e45f5e8b97d3b8a43b36",
+                {
+                    "full-tests-needed": "true",
+                },
+                id="Full tests needed when only dependencies change in 
pyproject.toml",
+            )
+        ),
+        (
+            pytest.param(
+                ("pyproject.toml",),
+                "c381fdaff42bbda480eee70fb15c5b26a2a3a77d",
+                {
+                    "full-tests-needed": "true",
+                },
+                id="Full tests needed when build-system changes in 
pyproject.toml",
+            )
+        ),
+    ],
+)
+def test_full_test_needed_when_pyproject_toml_changes(
+    files: tuple[str, ...], commit_ref: str, expected_outputs: dict[str, str]
+):
+    stderr = SelectiveChecks(
+        files=files,
+        github_event=GithubEvents.PULL_REQUEST,
+        commit_ref=commit_ref,
+        default_branch="main",
+    )
+    assert_outputs_are_printed(expected_outputs, str(stderr))
+
+
 @pytest.mark.parametrize(
     "files, pr_labels, default_branch, expected_outputs,",
     [
@@ -1149,7 +1196,7 @@ def 
test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):
 
 
 @pytest.mark.parametrize(
-    "files, expected_outputs, pr_labels",
+    "files, expected_outputs, pr_labels, commit_ref",
     [
         pytest.param(
             ("airflow/models/dag.py",),
@@ -1157,15 +1204,36 @@ def 
test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):
                 "upgrade-to-newer-dependencies": "false",
             },
             (),
+            None,
             id="Regular source changed",
         ),
+        pytest.param(
+            ("pyproject.toml",),
+            {
+                "upgrade-to-newer-dependencies": "false",
+            },
+            (),
+            # In this commit only ruff configuration changed
+            "2bc8e175b3a4cc84fe33e687f1a00d2a49563090",
+            id="pyproject.toml changed but no dependency change",
+        ),
+        pytest.param(
+            ("pyproject.toml",),
+            {
+                "upgrade-to-newer-dependencies": "true",
+            },
+            (),
+            "90e2b12d6b99d2f7db43e45f5e8b97d3b8a43b36",
+            id="pyproject.toml changed with optional dependencies changed",
+        ),
         pytest.param(
             ("pyproject.toml",),
             {
                 "upgrade-to-newer-dependencies": "true",
             },
             (),
-            id="pyproject.toml changed",
+            "74baebe5e774ac575fe3a49291996473b1daa789",
+            id="pyproject.toml changed with core dependencies changed",
         ),
         pytest.param(
             ("airflow/providers/microsoft/azure/provider.yaml",),
@@ -1173,6 +1241,7 @@ def 
test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):
                 "upgrade-to-newer-dependencies": "false",
             },
             (),
+            None,
             id="Provider.yaml changed",
         ),
         pytest.param(
@@ -1181,6 +1250,7 @@ def 
test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):
                 "upgrade-to-newer-dependencies": "true",
             },
             (),
+            "None",
             id="Generated provider_dependencies changed",
         ),
         pytest.param(
@@ -1189,16 +1259,20 @@ def 
test_no_commit_provided_trigger_full_build_for_any_event_type(github_event):
                 "upgrade-to-newer-dependencies": "true",
             },
             ("upgrade to newer dependencies",),
+            None,
             id="Regular source changed",
         ),
     ],
 )
 def test_upgrade_to_newer_dependencies(
-    files: tuple[str, ...], expected_outputs: dict[str, str], pr_labels: 
tuple[str, ...]
+    files: tuple[str, ...],
+    expected_outputs: dict[str, str],
+    pr_labels: tuple[str, ...],
+    commit_ref: str | None,
 ):
     stderr = SelectiveChecks(
         files=files,
-        commit_ref="HEAD",
+        commit_ref=commit_ref,
         github_event=GithubEvents.PULL_REQUEST,
         default_branch="main",
         pr_labels=pr_labels,

Reply via email to