https://github.com/python/cpython/commit/3eda1460359c7e6af4c5b125a63388635a3ed477 commit: 3eda1460359c7e6af4c5b125a63388635a3ed477 branch: main author: Bénédikt Tran <10796600+picn...@users.noreply.github.com> committer: picnixz <10796600+picn...@users.noreply.github.com> date: 2025-04-08T10:11:25Z summary:
gh-74598: add `fnmatch.filterfalse` for excluding names matching a patern (#121185) files: A Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst M Doc/library/fnmatch.rst M Doc/whatsnew/3.14.rst M Lib/fnmatch.py M Lib/test/test_fnmatch.py diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst index 5cb47777ae527d..8ebb09f1f0ff4f 100644 --- a/Doc/library/fnmatch.rst +++ b/Doc/library/fnmatch.rst @@ -90,6 +90,16 @@ functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`. but implemented more efficiently. +.. function:: filterfalse(names, pat) + + Construct a list from those elements of the :term:`iterable` of filename + strings *names* that do not match the pattern string *pat*. + It is the same as ``[n for n in names if not fnmatch(n, pat)]``, + but implemented more efficiently. + + .. versionadded:: next + + .. function:: translate(pat) Return the shell-style pattern *pat* converted to a regular expression for diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index cc71b3762035c3..ec0d050c84c6f8 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -677,6 +677,13 @@ errno (Contributed by James Roy in :gh:`126585`.) +fnmatch +------- + +* Added :func:`fnmatch.filterfalse` for excluding names matching a pattern. + (Contributed by Bénédikt Tran in :gh:`74598`.) + + fractions --------- diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 865baea23467ea..1dee8330f5d9d5 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -9,12 +9,15 @@ The function translate(PATTERN) returns a regular expression corresponding to PATTERN. (It does not compile it.) """ + +import functools +import itertools import os import posixpath import re -import functools -__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] +__all__ = ["filter", "filterfalse", "fnmatch", "fnmatchcase", "translate"] + def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -35,6 +38,7 @@ def fnmatch(name, pat): pat = os.path.normcase(pat) return fnmatchcase(name, pat) + @functools.lru_cache(maxsize=32768, typed=True) def _compile_pattern(pat): if isinstance(pat, bytes): @@ -45,6 +49,7 @@ def _compile_pattern(pat): res = translate(pat) return re.compile(res).match + def filter(names, pat): """Construct a list from those elements of the iterable NAMES that match PAT.""" result = [] @@ -61,6 +66,22 @@ def filter(names, pat): result.append(name) return result + +def filterfalse(names, pat): + """Construct a list from those elements of the iterable NAMES that do not match PAT.""" + pat = os.path.normcase(pat) + match = _compile_pattern(pat) + if os.path is posixpath: + # normcase on posix is NOP. Optimize it away from the loop. + return list(itertools.filterfalse(match, names)) + + result = [] + for name in names: + if match(os.path.normcase(name)) is None: + result.append(name) + return result + + def fnmatchcase(name, pat): """Test whether FILENAME matches PATTERN, including case. @@ -80,9 +101,11 @@ def translate(pat): parts, star_indices = _translate(pat, '*', '.') return _join_translated_parts(parts, star_indices) + _re_setops_sub = re.compile(r'([&~|])').sub _re_escape = functools.lru_cache(maxsize=512)(re.escape) + def _translate(pat, star, question_mark): res = [] add = res.append diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 9f360e1dc10f47..d4163cfe782ce0 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -1,11 +1,15 @@ """Test cases for the fnmatch module.""" -import unittest import os import string +import unittest import warnings +from fnmatch import fnmatch, fnmatchcase, translate, filter, filterfalse + + +IGNORECASE = os.path.normcase('P') == os.path.normcase('p') +NORMSEP = os.path.normcase('\\') == os.path.normcase('/') -from fnmatch import fnmatch, fnmatchcase, translate, filter class FnmatchTestCase(unittest.TestCase): @@ -77,23 +81,20 @@ def test_bytes(self): self.check_match(b'foo\nbar', b'foo*') def test_case(self): - ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') check = self.check_match check('abc', 'abc') - check('AbC', 'abc', ignorecase) - check('abc', 'AbC', ignorecase) + check('AbC', 'abc', IGNORECASE) + check('abc', 'AbC', IGNORECASE) check('AbC', 'AbC') def test_sep(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match check('usr/bin', 'usr/bin') - check('usr\\bin', 'usr/bin', normsep) - check('usr/bin', 'usr\\bin', normsep) + check('usr\\bin', 'usr/bin', NORMSEP) + check('usr/bin', 'usr\\bin', NORMSEP) check('usr\\bin', 'usr\\bin') def test_char_set(self): - ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') check = self.check_match tescases = string.ascii_lowercase + string.digits + string.punctuation for c in tescases: @@ -101,11 +102,11 @@ def test_char_set(self): check(c, '[!az]', c not in 'az') # Case insensitive. for c in tescases: - check(c, '[AZ]', (c in 'az') and ignorecase) - check(c, '[!AZ]', (c not in 'az') or not ignorecase) + check(c, '[AZ]', (c in 'az') and IGNORECASE) + check(c, '[!AZ]', (c not in 'az') or not IGNORECASE) for c in string.ascii_uppercase: - check(c, '[az]', (c in 'AZ') and ignorecase) - check(c, '[!az]', (c not in 'AZ') or not ignorecase) + check(c, '[az]', (c in 'AZ') and IGNORECASE) + check(c, '[!az]', (c not in 'AZ') or not IGNORECASE) # Repeated same character. for c in tescases: check(c, '[aa]', c == 'a') @@ -120,8 +121,6 @@ def test_char_set(self): check('[!]', '[!]') def test_range(self): - ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match tescases = string.ascii_lowercase + string.digits + string.punctuation for c in tescases: @@ -131,11 +130,11 @@ def test_range(self): check(c, '[!b-dx-z]', c not in 'bcdxyz') # Case insensitive. for c in tescases: - check(c, '[B-D]', (c in 'bcd') and ignorecase) - check(c, '[!B-D]', (c not in 'bcd') or not ignorecase) + check(c, '[B-D]', (c in 'bcd') and IGNORECASE) + check(c, '[!B-D]', (c not in 'bcd') or not IGNORECASE) for c in string.ascii_uppercase: - check(c, '[b-d]', (c in 'BCD') and ignorecase) - check(c, '[!b-d]', (c not in 'BCD') or not ignorecase) + check(c, '[b-d]', (c in 'BCD') and IGNORECASE) + check(c, '[!b-d]', (c not in 'BCD') or not IGNORECASE) # Upper bound == lower bound. for c in tescases: check(c, '[b-b]', c == 'b') @@ -144,7 +143,7 @@ def test_range(self): check(c, '[!-#]', c not in '-#') check(c, '[!--.]', c not in '-.') check(c, '[^-`]', c in '^_`') - if not (normsep and c == '/'): + if not (NORMSEP and c == '/'): check(c, '[[-^]', c in r'[\]^') check(c, r'[\-^]', c in r'\]^') check(c, '[b-]', c in '-b') @@ -160,47 +159,45 @@ def test_range(self): check(c, '[d-bx-z]', c in 'xyz') check(c, '[!d-bx-z]', c not in 'xyz') check(c, '[d-b^-`]', c in '^_`') - if not (normsep and c == '/'): + if not (NORMSEP and c == '/'): check(c, '[d-b[-^]', c in r'[\]^') def test_sep_in_char_set(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match check('/', r'[/]') check('\\', r'[\]') - check('/', r'[\]', normsep) - check('\\', r'[/]', normsep) + check('/', r'[\]', NORMSEP) + check('\\', r'[/]', NORMSEP) check('[/]', r'[/]', False) check(r'[\\]', r'[/]', False) check('\\', r'[\t]') - check('/', r'[\t]', normsep) + check('/', r'[\t]', NORMSEP) check('t', r'[\t]') check('\t', r'[\t]', False) def test_sep_in_range(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') check = self.check_match - check('a/b', 'a[.-0]b', not normsep) + check('a/b', 'a[.-0]b', not NORMSEP) check('a\\b', 'a[.-0]b', False) - check('a\\b', 'a[Z-^]b', not normsep) + check('a\\b', 'a[Z-^]b', not NORMSEP) check('a/b', 'a[Z-^]b', False) - check('a/b', 'a[/-0]b', not normsep) + check('a/b', 'a[/-0]b', not NORMSEP) check(r'a\b', 'a[/-0]b', False) check('a[/-0]b', 'a[/-0]b', False) check(r'a[\-0]b', 'a[/-0]b', False) check('a/b', 'a[.-/]b') - check(r'a\b', 'a[.-/]b', normsep) + check(r'a\b', 'a[.-/]b', NORMSEP) check('a[.-/]b', 'a[.-/]b', False) check(r'a[.-\]b', 'a[.-/]b', False) check(r'a\b', r'a[\-^]b') - check('a/b', r'a[\-^]b', normsep) + check('a/b', r'a[\-^]b', NORMSEP) check(r'a[\-^]b', r'a[\-^]b', False) check('a[/-^]b', r'a[\-^]b', False) - check(r'a\b', r'a[Z-\]b', not normsep) + check(r'a\b', r'a[Z-\]b', not NORMSEP) check('a/b', r'a[Z-\]b', False) check(r'a[Z-\]b', r'a[Z-\]b', False) check('a[Z-/]b', r'a[Z-\]b', False) @@ -332,18 +329,41 @@ def test_mix_bytes_str(self): self.assertRaises(TypeError, filter, [b'test'], '*') def test_case(self): - ignorecase = os.path.normcase('P') == os.path.normcase('p') self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'), - ['Test.py', 'Test.PL'] if ignorecase else ['Test.py']) + ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.py']) self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'), - ['Test.py', 'Test.PL'] if ignorecase else ['Test.PL']) + ['Test.py', 'Test.PL'] if IGNORECASE else ['Test.PL']) def test_sep(self): - normsep = os.path.normcase('\\') == os.path.normcase('/') self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'), - ['usr/bin', 'usr\\lib'] if normsep else ['usr/bin']) + ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr/bin']) self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'), - ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib']) + ['usr/bin', 'usr\\lib'] if NORMSEP else ['usr\\lib']) + + +class FilterFalseTestCase(unittest.TestCase): + + def test_filterfalse(self): + actual = filterfalse(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*') + self.assertListEqual(actual, ['Ruby', 'Tcl']) + actual = filterfalse([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*') + self.assertListEqual(actual, [b'Ruby', b'Tcl']) + + def test_mix_bytes_str(self): + self.assertRaises(TypeError, filterfalse, ['test'], b'*') + self.assertRaises(TypeError, filterfalse, [b'test'], '*') + + def test_case(self): + self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'), + ['Test.rb'] if IGNORECASE else ['Test.rb', 'Test.PL']) + self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'), + ['Test.rb'] if IGNORECASE else ['Test.py', 'Test.rb',]) + + def test_sep(self): + self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'), + ['usr'] if NORMSEP else ['usr', 'usr\\lib']) + self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'), + ['usr'] if NORMSEP else ['usr/bin', 'usr']) if __name__ == "__main__": diff --git a/Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst b/Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst new file mode 100644 index 00000000000000..3e0d052a58219e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-30-17-00-00.gh-issue-74598.1gVy_8.rst @@ -0,0 +1,2 @@ +Add :func:`fnmatch.filterfalse` for excluding names matching a pattern. +Patch by Bénédikt Tran. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com