This is an automated email from the ASF dual-hosted git repository.
shahar1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 36356cc0d48 Don't force the full test matrix for large test/docs-only
PRs (#68059)
36356cc0d48 is described below
commit 36356cc0d48c63e565615d8936c4cd0bd47fc419
Author: Shahar Epstein <[email protected]>
AuthorDate: Fri Jun 5 19:40:23 2026 +0300
Don't force the full test matrix for large test/docs-only PRs (#68059)
The "large PR" heuristic escalates a PR to the full test matrix on two
gates: number of changed files (>=25) and lines of production code changed
(>=500). The line-count gate was already narrowed to count production code
only, so test/docs/translation/example-DAG churn does not trigger it. The
file-count gate, however, still counted every changed file (minus
newsfragments and lockfiles), so a PR touching 25+ test, docs, or
example-DAG files alone forced the full ~135-job matrix.
Base the file-count gate on the same production-file set the line-count
gate uses, so both heuristics agree on what "production code" means. A PR
that only touches tests, docs, translations, or example DAGs no longer
forces the full matrix on its file count.
Co-authored-by: Claude Opus 4.8 <[email protected]>
---
.../src/airflow_breeze/utils/selective_checks.py | 64 +++++++++-------------
dev/breeze/tests/test_selective_checks.py | 36 ++++++++++++
2 files changed, 62 insertions(+), 38 deletions(-)
diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index 373cf7c8e39..d0893c1e6ff 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -749,14 +749,13 @@ class SelectiveChecks:
"""
Check if PR is large enough to run full tests.
- The heuristics are based on number of files changed and total lines
changed,
- while excluding generated files which can be ignored.
-
- The line-count check (``LINE_THRESHOLD``) only counts lines in
production-code
- files — tests, docs, newsfragments, generated files, translations, dev
tooling,
- and similar low-risk paths do not contribute to the line count. A
1000-line test
- or docs PR is not the same shape of risk as a 1000-line change to
scheduler
- code, and only the latter should trigger the full test matrix.
+ Both heuristics — the count of changed files (``FILE_THRESHOLD``) and
the
+ total lines changed (``LINE_THRESHOLD``) — only consider
production-code
+ files. Tests, docs, newsfragments, generated files, translations,
example
+ DAGs, and dev tooling are low-risk: a PR that only touches them,
however
+ many files or lines, must not force the full test matrix. A 1000-line
(or
+ 40-file) test or docs PR is not the same shape of risk as the same
churn in
+ scheduler code, and only the latter should trigger the full test
matrix.
"""
FILE_THRESHOLD = 25
LINE_THRESHOLD = 500
@@ -764,34 +763,12 @@ class SelectiveChecks:
if not self._files:
return False
- exclude_patterns = [
- r"/newsfragments/",
- r"^uv\.lock$",
- r"pnpm-lock\.yaml$",
- r"package-lock\.json$",
- ]
-
- relevant_files = [
- f for f in self._files if not any(re.search(pattern, f) for
pattern in exclude_patterns)
- ]
-
- files_changed = len(relevant_files)
- if files_changed >= FILE_THRESHOLD:
- console_print(
- f"[warning]Running full set of tests because PR touches
{files_changed} files "
- f"(≥25 threshold)[/]"
- )
- return True
-
- if not self._commit_ref:
- console_print("[warning]Cannot determine if PR is big enough,
skipping the check[/]")
- return False
-
- # The line-count gate only counts churn in production code. We compose
- # the existing `*_PRODUCTION_FILES` and helm groups rather than rolling
- # a bespoke pattern set, so the definition of "production code" stays
- # in lockstep with the rest of CI (e.g. SAST scans targeted by
- # `run_python_scans` / `run_javascript_scans`).
+ # Both gates count churn in production code only. We compose the
existing
+ # `*_PRODUCTION_FILES` and helm groups rather than rolling a bespoke
pattern
+ # set, so the definition of "production code" stays in lockstep with
the rest
+ # of CI (e.g. SAST scans targeted by `run_python_scans` /
+ # `run_javascript_scans`). These groups already exclude tests, docs,
+ # generated files, translations, and example DAGs.
production_files = list(
dict.fromkeys(
self._matching_files(FileGroupForCi.PYTHON_PRODUCTION_FILES,
CI_FILE_GROUP_MATCHES)
@@ -802,6 +779,18 @@ class SelectiveChecks:
if not production_files:
return False
+ files_changed = len(production_files)
+ if files_changed >= FILE_THRESHOLD:
+ console_print(
+ f"[warning]Running full set of tests because PR touches
{files_changed} "
+ f"production files (≥{FILE_THRESHOLD} threshold)[/]"
+ )
+ return True
+
+ if not self._commit_ref:
+ console_print("[warning]Cannot determine if PR is big enough,
skipping the check[/]")
+ return False
+
try:
result = run_command(
["git", "diff", "--numstat",
f"{self._commit_ref}^...{self._commit_ref}"] + production_files,
@@ -826,8 +815,7 @@ class SelectiveChecks:
if total_lines >= LINE_THRESHOLD:
console_print(
f"[warning]Running full set of tests because PR
changes {total_lines} lines "
- f"of production code in {len(production_files)}
file(s) "
- f"(of {files_changed} relevant file(s))[/]"
+ f"of production code in {len(production_files)}
file(s)[/]"
)
return True
except Exception:
diff --git a/dev/breeze/tests/test_selective_checks.py
b/dev/breeze/tests/test_selective_checks.py
index cf2b7b99344..9ba1ada757a 100644
--- a/dev/breeze/tests/test_selective_checks.py
+++ b/dev/breeze/tests/test_selective_checks.py
@@ -3634,6 +3634,42 @@ def
test_provider_dependency_bump_check_in_optional_dependencies(mock_run_comman
},
id="PR with only lock files changed",
),
+ # The file-count gate, like the line-count gate, only counts production
+ # code. A PR that touches many test, docs, or example-DAG files — and
no
+ # production code — must not force the full matrix on its file count
alone.
+ pytest.param(
+ tuple(f"airflow-core/tests/unit/models/test_file{i}.py" for i in
range(30)),
+ {
+ "full-tests-needed": "false",
+ },
+ id="Large test-only PR (30 files) does not trigger full tests",
+ ),
+ pytest.param(
+ tuple(f"airflow-core/docs/page_{i}.rst" for i in range(30)),
+ {
+ "full-tests-needed": "false",
+ },
+ id="Large docs-only PR (30 files) does not trigger full tests",
+ ),
+ pytest.param(
+ tuple(f"airflow-core/src/airflow/example_dags/example_{i}.py" for
i in range(30)),
+ {
+ "full-tests-needed": "false",
+ },
+ id="Large example_dags-only PR (30 files) does not trigger full
tests",
+ ),
+ # A mix below the production-file threshold (20 production + 20 test
files)
+ # must not trip the file-count gate on the combined count of 40.
+ pytest.param(
+ tuple(
+ [f"airflow-core/src/airflow/models/file{i}.py" for i in
range(20)]
+ + [f"airflow-core/tests/unit/models/test_file{i}.py" for i in
range(20)]
+ ),
+ {
+ "full-tests-needed": "false",
+ },
+ id="Mixed PR with 20 production files (of 40) does not trigger on
file count",
+ ),
],
)
def test_large_pr_by_file_count(files, expected_outputs: dict[str, str]):