uranusjr commented on code in PR #22051:
URL: https://github.com/apache/airflow/pull/22051#discussion_r848830395
##########
airflow/utils/file.py:
##########
@@ -108,46 +194,77 @@ def open_maybe_zipped(fileloc, mode='r'):
return open(fileloc, mode=mode)
-def find_path_from_directory(base_dir_path: str, ignore_file_name: str) ->
Generator[str, None, None]:
+def _find_path_from_directory(
+ base_dir_path: str,
+ ignore_file_name: str,
+ ignore_rule_type: Type[_IgnoreRule],
+) -> Generator[str, None, None]:
"""
- Search the file and return the path of the file that should not be ignored.
- :param base_dir_path: the base path to be searched for.
- :param ignore_file_name: the file name in which specifies a regular
expression pattern is written.
+ Recursively search the base path and return the list of file paths that
should not be ignored by
+ regular expressions in any ignore files at each directory level.
+ :param base_dir_path: the base path to be searched
+ :param ignore_file_name: the file name containing regular expressions for
files that should be ignored.
+ :param ignore_rule_type: the concrete class for ignore rules, which
implements the _IgnoreRule interface.
- :return : file path not to be ignored.
+ :return : a generator of file paths which should not be ignored.
"""
- patterns_by_dir: Dict[str, List[Pattern[str]]] = {}
-
- for root, dirs, files in os.walk(str(base_dir_path), followlinks=True):
- patterns: List[Pattern[str]] = patterns_by_dir.get(root, [])
-
- ignore_file_path = os.path.join(root, ignore_file_name)
- if os.path.isfile(ignore_file_path):
- with open(ignore_file_path) as file:
- lines_no_comments = [re.sub(r"\s*#.*", "", line) for line in
file.read().split("\n")]
- patterns += [re.compile(line) for line in lines_no_comments if
line]
- patterns = list(set(patterns))
-
- dirs[:] = [
- subdir
- for subdir in dirs
- if not any(
- p.search(os.path.join(os.path.relpath(root,
str(base_dir_path)), subdir)) for p in patterns
- )
- ]
-
- patterns_by_dir.update({os.path.join(root, sd): patterns.copy() for sd
in dirs})
-
- for file in files: # type: ignore
+ patterns_by_dir: Dict[Path, List[_IgnoreRule]] = {}
+
+ for root, dirs, files in os.walk(base_dir_path, followlinks=True):
+ patterns: List[_IgnoreRule] = patterns_by_dir.get(Path(root), [])
+
+ ignore_file_path = Path(root) / ignore_file_name
+ if ignore_file_path.is_file():
+ with open(ignore_file_path) as ifile:
+ lines_no_comments = [re.sub(r"\s*#.*", "", line) for line in
ifile.read().split("\n")]
+ # append new patterns and filter out "None" objects, which are
invalid patterns
+ patterns += [
+ p
+ for p in [
+ ignore_rule_type.compile(line, Path(base_dir_path),
ignore_file_path)
+ for line in lines_no_comments
+ if line
+ ]
+ if p is not None
+ ]
+ # evaluation order of patterns is important with negation
+ # so that later patterns can override earlier patterns
+ patterns = list(OrderedDict.fromkeys(patterns).keys())
+
+ dirs[:] = [subdir for subdir in dirs if not
ignore_rule_type.match(Path(root) / subdir, patterns)]
+
+ patterns_by_dir.update({Path(root) / sd: patterns.copy() for sd in
dirs})
+
+ for file in files:
if file == ignore_file_name:
continue
- abs_file_path = os.path.join(root, str(file))
- rel_file_path = os.path.join(os.path.relpath(root,
str(base_dir_path)), str(file))
- if any(p.search(rel_file_path) for p in patterns):
+ abs_file_path = Path(root) / file
+ if ignore_rule_type.match(abs_file_path, patterns):
continue
yield str(abs_file_path)
+def find_path_from_directory(
+ base_dir_path: str,
+ ignore_file_name: str,
+ ignore_file_syntax: str = conf.get('core', 'DAG_IGNORE_FILE_SYNTAX',
fallback="regexp"),
+) -> Generator[str, None, None]:
+ """
+ Recursively search the base path and return the list of file paths that
should not be ignored.
+ :param base_dir_path: the base path to be searched
+ :param ignore_file_name: the file name in which specifies the patterns of
files/dirs to be ignored
+ :param ignore_file_syntax: the syntax of patterns in the ignore file:
regexp or glob
+
+ :return : a generator of file paths.
Review Comment:
```suggestion
:return: a generator of file paths.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]