This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4adf4e62f95 Don't force the full test matrix for large 
example_dags-only changes (#68042)
4adf4e62f95 is described below

commit 4adf4e62f95dadc8db9680da68ebb398fa877ebe
Author: Shahar Epstein <[email protected]>
AuthorDate: Fri Jun 5 06:15:26 2026 +0300

    Don't force the full test matrix for large example_dags-only changes 
(#68042)
    
    A large diff to example DAGs (e.g. a single provider example like
    apache/airflow#68037, +667/-119) tripped the `_is_large_enough_pr`
    line-count gate, which set `full-tests-needed=true` and fanned out the
    entire matrix — core DB tests, Kubernetes, Helm, PROD images, all-provider
    compat and special tests — for what is illustrative, non-shipped code.
    
    Exclude `example_dags/` from `PYTHON_PRODUCTION_FILES` (the "production
    code" definition that feeds the line-count gate) for both the airflow-core
    top-level `airflow/example_dags/` and the nested
    `providers/<name>/.../example_dags/` layout. Example DAGs are still selected
    for their own tests via the broader `ALL_AIRFLOW_PYTHON_FILES` /
    `ALL_PROVIDERS_PYTHON_FILES` groups, so they keep running the relevant
    core/provider tests — they just no longer force the full matrix.
---
 .../src/airflow_breeze/utils/selective_checks.py   | 12 +++++++--
 dev/breeze/tests/test_selective_checks.py          | 31 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py 
b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index 3514c558476..2c40be9364d 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -220,10 +220,18 @@ CI_FILE_GROUP_MATCHES: HashableDict[FileGroupForCi] = 
HashableDict(
             # `run_python_scans` (SAST/SCA target) and the line-threshold check
             # in `_is_large_enough_pr` to decide whether a PR's diff is large
             # enough to force the full test matrix.
-            
r"^airflow-core/src/airflow/(?!.*/(?:openapi-gen|i18n/locales)/).*\.py$",
+            #
+            # `example_dags/` are illustrative, not shipped runtime code, so a 
large
+            # example-DAG diff must not force the full matrix. They are still 
selected
+            # for their own tests via the broader `ALL_AIRFLOW_PYTHON_FILES` /
+            # `ALL_PROVIDERS_PYTHON_FILES` groups, so excluding them here only 
affects
+            # the line-count gate (and SAST target), not test selection. The
+            # `(?:.*/)?` covers both airflow-core's top-level 
`airflow/example_dags/`
+            # and the nested `providers/<name>/.../example_dags/` layout.
+            
r"^airflow-core/src/airflow/(?!(?:.*/)?example_dags/)(?!.*/(?:openapi-gen|i18n/locales)/).*\.py$",
             r"^task-sdk/src/airflow/(?!.*_generated\.py$).*\.py$",
             r"^airflow-ctl/src/airflowctl/(?!.*generated\.py$).*\.py$",
-            r"^providers/(?:[^/]+/)+src/.*\.py$",
+            r"^providers/(?:[^/]+/)+src/(?!(?:.*/)?example_dags/).*\.py$",
             r"^shared/[^/]+/src/.*\.py$",
             r"^pyproject\.toml$",
             r"^hatch_build\.py$",
diff --git a/dev/breeze/tests/test_selective_checks.py 
b/dev/breeze/tests/test_selective_checks.py
index 90ba901683a..6f6177be26d 100644
--- a/dev/breeze/tests/test_selective_checks.py
+++ b/dev/breeze/tests/test_selective_checks.py
@@ -3619,6 +3619,37 @@ def test_large_pr_by_file_count(files, expected_outputs: 
dict[str, str]):
             },
             id="Mixed PR with only 200 production lines does not trigger (test 
lines excluded)",
         ),
+        # A large example-DAG diff in a "plain" provider (not standard/git, 
which
+        # have their own full-tests rule) must NOT force the full matrix. This 
is
+        # the exact shape of apache/airflow#68037.
+        pytest.param(
+            (
+                
"providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py",
+            ),
+            
"600\t600\tproviders/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py",
+            {
+                "full-tests-needed": "false",
+            },
+            id="Large provider example_dags-only PR does not trigger full 
tests",
+        ),
+        pytest.param(
+            ("airflow-core/src/airflow/example_dags/example_complex.py",),
+            
"600\t600\tairflow-core/src/airflow/example_dags/example_complex.py",
+            {
+                "full-tests-needed": "false",
+            },
+            id="Large airflow-core example_dags-only PR does not trigger full 
tests",
+        ),
+        # Regression guard: a large *non-example* file in the same plain 
provider
+        # must still count as production code and trigger the full matrix.
+        pytest.param(
+            
("providers/arangodb/src/airflow/providers/arangodb/operators/arangodb.py",),
+            
"600\t600\tproviders/arangodb/src/airflow/providers/arangodb/operators/arangodb.py",
+            {
+                "full-tests-needed": "true",
+            },
+            id="Large provider production (non-example) PR still triggers full 
tests",
+        ),
     ],
 )
 def test_large_pr_by_line_count(files, git_diff_output, expected_outputs: 
dict[str, str]):

Reply via email to