This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new dba47277f30 fix: correct airflowignore negation pattern handling for 
directory-only patterns (#62860)
dba47277f30 is described below

commit dba47277f309255c7d40b1001578411d5cb29e2b
Author: Yoann <[email protected]>
AuthorDate: Wed Mar 4 06:05:34 2026 -0800

    fix: correct airflowignore negation pattern handling for directory-only 
patterns (#62860)
    
    Directory-only patterns (ending with /) in .airflowignore were incorrectly
    matching files inside the directory, not just the directory itself. This 
caused
    negation patterns like !abc/def/ to un-ignore all files within abc/def/ 
instead
    of only un-ignoring the directory for traversal purposes.
    
    Added dir_only flag to _GlobIgnoreRule that skips file matching for
    directory-only patterns, consistent with gitignore specification.
    
    Closes: #62716
---
 .../module_loading/file_discovery.py               | 18 ++++++++--
 .../tests/module_loading/test_file_discovery.py    | 41 ++++++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git 
a/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py 
b/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py
index 8b1536544f8..523dc8d35f1 100644
--- a/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py
+++ b/shared/module_loading/src/airflow_shared/module_loading/file_discovery.py
@@ -79,6 +79,7 @@ class _GlobIgnoreRule(NamedTuple):
 
     wild_match_pattern: GitWildMatchPattern
     relative_to: Path | None = None
+    dir_only: bool = False
 
     @staticmethod
     def compile(pattern: str, base_dir: Path, definition_file: Path) -> 
_IgnoreRule | None:
@@ -95,8 +96,15 @@ class _GlobIgnoreRule(NamedTuple):
             # > Otherwise the pattern may also match at any level below the 
.gitignore level.
             relative_to = definition_file.parent
 
+        # See https://git-scm.com/docs/gitignore
+        # > If there is a separator at the end of the pattern then the pattern 
will only match
+        # > directories, otherwise the pattern can match both files and 
directories.
+        # Strip the negation prefix before checking for trailing separator.
+        raw_pattern = pattern.lstrip("!")
+        dir_only = raw_pattern.rstrip() != raw_pattern.rstrip().rstrip("/")
+
         ignore_pattern = GitWildMatchPattern(pattern)
-        return _GlobIgnoreRule(wild_match_pattern=ignore_pattern, 
relative_to=relative_to)
+        return _GlobIgnoreRule(wild_match_pattern=ignore_pattern, 
relative_to=relative_to, dir_only=dir_only)
 
     @staticmethod
     def match(path: Path, rules: list[_IgnoreRule]) -> bool:
@@ -105,8 +113,14 @@ class _GlobIgnoreRule(NamedTuple):
         for rule in rules:
             if not isinstance(rule, _GlobIgnoreRule):
                 raise ValueError(f"_GlobIgnoreRule cannot match rules of type: 
{type(rule)}")
+            # See https://git-scm.com/docs/gitignore
+            # > If there is a separator at the end of the pattern then the 
pattern will only match
+            # > directories, otherwise the pattern can match both files and 
directories.
+            is_dir = path.is_dir()
+            if rule.dir_only and not is_dir:
+                continue
             rel_obj = path.relative_to(rule.relative_to) if rule.relative_to 
else Path(path.name)
-            if path.is_dir():
+            if is_dir:
                 rel_path = f"{rel_obj.as_posix()}/"
             else:
                 rel_path = rel_obj.as_posix()
diff --git a/shared/module_loading/tests/module_loading/test_file_discovery.py 
b/shared/module_loading/tests/module_loading/test_file_discovery.py
index 0c5347dca1d..a745f47fce6 100644
--- a/shared/module_loading/tests/module_loading/test_file_discovery.py
+++ b/shared/module_loading/tests/module_loading/test_file_discovery.py
@@ -137,3 +137,44 @@ class TestFindPathFromDirectory:
                 detected.add(p.relative_to(dags_root).as_posix())
 
         assert detected == {"a/b/subfolder/keep.py"}
+
+    def 
test_airflowignore_negation_directory_only_patterns_do_not_unignore_files(self, 
tmp_path):
+        """Directory-only negation patterns should only unignore directories, 
not files inside them.
+
+        Regression test for https://github.com/apache/airflow/issues/62716
+
+        Patterns:
+          *                     -> ignore everything
+          !abc/                 -> unignore abc dir (for traversal), NOT its 
contents
+          !abc/def/             -> unignore abc/def dir (for traversal), NOT 
its contents
+          !abc/def/xyz/         -> unignore abc/def/xyz dir (for traversal), 
NOT its contents
+          !abc/def/xyz/*        -> unignore contents of abc/def/xyz
+        """
+        dags_root = tmp_path / "dags"
+        (dags_root / "abc" / "def" / "xyz").mkdir(parents=True)
+
+        # files at various levels – only xyz_dag.py should be discovered
+        (dags_root / "root_dag.py").write_text("raise Exception('ignored')\n")
+        (dags_root / "abc" / "abc_dag.py").write_text("raise 
Exception('ignored')\n")
+        (dags_root / "abc" / "def" / "def_dag.py").write_text("raise 
Exception('ignored')\n")
+        (dags_root / "abc" / "def" / "xyz" / "xyz_dag.py").write_text("# 
should be discovered\n")
+
+        (dags_root / ".airflowignore").write_text(
+            "\n".join(
+                [
+                    "*",
+                    "!abc/",
+                    "!abc/def/",
+                    "!abc/def/xyz/",
+                    "!abc/def/xyz/*",
+                ]
+            )
+        )
+
+        detected = set()
+        for raw in find_path_from_directory(dags_root, ".airflowignore", 
"glob"):
+            p = Path(raw)
+            if p.is_file() and p.suffix == ".py":
+                detected.add(p.relative_to(dags_root).as_posix())
+
+        assert detected == {"abc/def/xyz/xyz_dag.py"}

Reply via email to