This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 4adf4e62f95 Don't force the full test matrix for large
example_dags-only changes (#68042)
4adf4e62f95 is described below
commit 4adf4e62f95dadc8db9680da68ebb398fa877ebe
Author: Shahar Epstein <[email protected]>
AuthorDate: Fri Jun 5 06:15:26 2026 +0300
Don't force the full test matrix for large example_dags-only changes
(#68042)
A large diff to example DAGs (e.g. a single provider example like
apache/airflow#68037, +667/-119) tripped the `_is_large_enough_pr`
line-count gate, which set `full-tests-needed=true` and fanned out the
entire matrix — core DB tests, Kubernetes, Helm, PROD images, all-provider
compat and special tests — for what is illustrative, non-shipped code.
Exclude `example_dags/` from `PYTHON_PRODUCTION_FILES` (the "production
code" definition that feeds the line-count gate) for both the airflow-core
top-level `airflow/example_dags/` and the nested
`providers/<name>/.../example_dags/` layout. Example DAGs are still selected
for their own tests via the broader `ALL_AIRFLOW_PYTHON_FILES` /
`ALL_PROVIDERS_PYTHON_FILES` groups, so they keep running the relevant
core/provider tests — they just no longer force the full matrix.
---
.../src/airflow_breeze/utils/selective_checks.py | 12 +++++++--
dev/breeze/tests/test_selective_checks.py | 31 ++++++++++++++++++++++
2 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index 3514c558476..2c40be9364d 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -220,10 +220,18 @@ CI_FILE_GROUP_MATCHES: HashableDict[FileGroupForCi] =
HashableDict(
# `run_python_scans` (SAST/SCA target) and the line-threshold check
# in `_is_large_enough_pr` to decide whether a PR's diff is large
# enough to force the full test matrix.
-
r"^airflow-core/src/airflow/(?!.*/(?:openapi-gen|i18n/locales)/).*\.py$",
+ #
+ # `example_dags/` are illustrative, not shipped runtime code, so a
large
+ # example-DAG diff must not force the full matrix. They are still
selected
+ # for their own tests via the broader `ALL_AIRFLOW_PYTHON_FILES` /
+ # `ALL_PROVIDERS_PYTHON_FILES` groups, so excluding them here only
affects
+ # the line-count gate (and SAST target), not test selection. The
+ # `(?:.*/)?` covers both airflow-core's top-level
`airflow/example_dags/`
+ # and the nested `providers/<name>/.../example_dags/` layout.
+
r"^airflow-core/src/airflow/(?!(?:.*/)?example_dags/)(?!.*/(?:openapi-gen|i18n/locales)/).*\.py$",
r"^task-sdk/src/airflow/(?!.*_generated\.py$).*\.py$",
r"^airflow-ctl/src/airflowctl/(?!.*generated\.py$).*\.py$",
- r"^providers/(?:[^/]+/)+src/.*\.py$",
+ r"^providers/(?:[^/]+/)+src/(?!(?:.*/)?example_dags/).*\.py$",
r"^shared/[^/]+/src/.*\.py$",
r"^pyproject\.toml$",
r"^hatch_build\.py$",
diff --git a/dev/breeze/tests/test_selective_checks.py
b/dev/breeze/tests/test_selective_checks.py
index 90ba901683a..6f6177be26d 100644
--- a/dev/breeze/tests/test_selective_checks.py
+++ b/dev/breeze/tests/test_selective_checks.py
@@ -3619,6 +3619,37 @@ def test_large_pr_by_file_count(files, expected_outputs:
dict[str, str]):
},
id="Mixed PR with only 200 production lines does not trigger (test
lines excluded)",
),
+ # A large example-DAG diff in a "plain" provider (not standard/git,
which
+ # have their own full-tests rule) must NOT force the full matrix. This
is
+ # the exact shape of apache/airflow#68037.
+ pytest.param(
+ (
+
"providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py",
+ ),
+
"600\t600\tproviders/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py",
+ {
+ "full-tests-needed": "false",
+ },
+ id="Large provider example_dags-only PR does not trigger full
tests",
+ ),
+ pytest.param(
+ ("airflow-core/src/airflow/example_dags/example_complex.py",),
+
"600\t600\tairflow-core/src/airflow/example_dags/example_complex.py",
+ {
+ "full-tests-needed": "false",
+ },
+ id="Large airflow-core example_dags-only PR does not trigger full
tests",
+ ),
+ # Regression guard: a large *non-example* file in the same plain
provider
+ # must still count as production code and trigger the full matrix.
+ pytest.param(
+
("providers/arangodb/src/airflow/providers/arangodb/operators/arangodb.py",),
+
"600\t600\tproviders/arangodb/src/airflow/providers/arangodb/operators/arangodb.py",
+ {
+ "full-tests-needed": "true",
+ },
+ id="Large provider production (non-example) PR still triggers full
tests",
+ ),
],
)
def test_large_pr_by_line_count(files, git_diff_output, expected_outputs:
dict[str, str]):