uranusjr commented on code in PR #22051:
URL: https://github.com/apache/airflow/pull/22051#discussion_r848830395


##########
airflow/utils/file.py:
##########
@@ -108,46 +194,77 @@ def open_maybe_zipped(fileloc, mode='r'):
         return open(fileloc, mode=mode)
 
 
-def find_path_from_directory(base_dir_path: str, ignore_file_name: str) -> 
Generator[str, None, None]:
+def _find_path_from_directory(
+    base_dir_path: str,
+    ignore_file_name: str,
+    ignore_rule_type: Type[_IgnoreRule],
+) -> Generator[str, None, None]:
     """
-    Search the file and return the path of the file that should not be ignored.
-    :param base_dir_path: the base path to be searched for.
-    :param ignore_file_name: the file name in which specifies a regular 
expression pattern is written.
+    Recursively search the base path and return the list of file paths that 
should not be ignored by
+    regular expressions in any ignore files at each directory level.
+    :param base_dir_path: the base path to be searched
+    :param ignore_file_name: the file name containing regular expressions for 
files that should be ignored.
+    :param ignore_rule_type: the concrete class for ignore rules, which 
implements the _IgnoreRule interface.
 
-    :return : file path not to be ignored.
+    :return : a generator of file paths which should not be ignored.
     """
-    patterns_by_dir: Dict[str, List[Pattern[str]]] = {}
-
-    for root, dirs, files in os.walk(str(base_dir_path), followlinks=True):
-        patterns: List[Pattern[str]] = patterns_by_dir.get(root, [])
-
-        ignore_file_path = os.path.join(root, ignore_file_name)
-        if os.path.isfile(ignore_file_path):
-            with open(ignore_file_path) as file:
-                lines_no_comments = [re.sub(r"\s*#.*", "", line) for line in 
file.read().split("\n")]
-                patterns += [re.compile(line) for line in lines_no_comments if 
line]
-                patterns = list(set(patterns))
-
-        dirs[:] = [
-            subdir
-            for subdir in dirs
-            if not any(
-                p.search(os.path.join(os.path.relpath(root, 
str(base_dir_path)), subdir)) for p in patterns
-            )
-        ]
-
-        patterns_by_dir.update({os.path.join(root, sd): patterns.copy() for sd 
in dirs})
-
-        for file in files:  # type: ignore
+    patterns_by_dir: Dict[Path, List[_IgnoreRule]] = {}
+
+    for root, dirs, files in os.walk(base_dir_path, followlinks=True):
+        patterns: List[_IgnoreRule] = patterns_by_dir.get(Path(root), [])
+
+        ignore_file_path = Path(root) / ignore_file_name
+        if ignore_file_path.is_file():
+            with open(ignore_file_path) as ifile:
+                lines_no_comments = [re.sub(r"\s*#.*", "", line) for line in 
ifile.read().split("\n")]
+                # append new patterns and filter out "None" objects, which are 
invalid patterns
+                patterns += [
+                    p
+                    for p in [
+                        ignore_rule_type.compile(line, Path(base_dir_path), 
ignore_file_path)
+                        for line in lines_no_comments
+                        if line
+                    ]
+                    if p is not None
+                ]
+                # evaluation order of patterns is important with negation
+                # so that later patterns can override earlier patterns
+                patterns = list(OrderedDict.fromkeys(patterns).keys())
+
+        dirs[:] = [subdir for subdir in dirs if not 
ignore_rule_type.match(Path(root) / subdir, patterns)]
+
+        patterns_by_dir.update({Path(root) / sd: patterns.copy() for sd in 
dirs})
+
+        for file in files:
             if file == ignore_file_name:
                 continue
-            abs_file_path = os.path.join(root, str(file))
-            rel_file_path = os.path.join(os.path.relpath(root, 
str(base_dir_path)), str(file))
-            if any(p.search(rel_file_path) for p in patterns):
+            abs_file_path = Path(root) / file
+            if ignore_rule_type.match(abs_file_path, patterns):
                 continue
             yield str(abs_file_path)
 
 
+def find_path_from_directory(
+    base_dir_path: str,
+    ignore_file_name: str,
+    ignore_file_syntax: str = conf.get('core', 'DAG_IGNORE_FILE_SYNTAX', 
fallback="regexp"),
+) -> Generator[str, None, None]:
+    """
+    Recursively search the base path and return the list of file paths that 
should not be ignored.
+    :param base_dir_path: the base path to be searched
+    :param ignore_file_name: the file name in which specifies the patterns of 
files/dirs to be ignored
+    :param ignore_file_syntax: the syntax of patterns in the ignore file: 
regexp or glob
+
+    :return : a generator of file paths.

Review Comment:
   ```suggestion
       :return: a generator of file paths.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to