https://github.com/python/cpython/commit/ac56f8cc8d36ed65228d7eaa245569f66ad16d2b commit: ac56f8cc8d36ed65228d7eaa245569f66ad16d2b branch: main author: Serhiy Storchaka <storch...@gmail.com> committer: serhiy-storchaka <storch...@gmail.com> date: 2025-05-03T07:54:33Z summary:
gh-133306: Support \z as a synonym for \Z in regular expressions (GH-133314) \Z was an error inherited from PCRE 0.95. It was fixed in PCRE 2.0. In other engines, \Z means not “anchor at string end”, but “anchor before optional newline at string end”. \z means “anchor at string end” in most RE engines. files: A Misc/NEWS.d/next/Library/2025-05-02-21-35-03.gh-issue-133306.-vBye5.rst M Doc/howto/regex.rst M Doc/library/re.rst M Doc/whatsnew/3.14.rst M Lib/re/__init__.py M Lib/re/_parser.py M Lib/test/re_tests.py M Lib/test/test_re.py diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index 5e2f9a9d1837fe..e543f6d5657d79 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -738,9 +738,12 @@ given location, they can obviously be matched an infinite number of times. different: ``\A`` still matches only at the beginning of the string, but ``^`` may match at any location inside the string that follows a newline character. -``\Z`` +``\z`` Matches only at the end of the string. +``\Z`` + The same as ``\z``. For compatibility with old Python versions. + ``\b`` Word boundary. This is a zero-width assertion that matches only at the beginning or end of a word. A word is defined as a sequence of alphanumeric diff --git a/Doc/library/re.rst b/Doc/library/re.rst index a91bac53fb4e75..0ee2d68bcbe006 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -266,7 +266,7 @@ The special characters are: not a word boundary as outside a set, and numeric escapes such as ``\1`` are always octal escapes, not group references. Special sequences which do not match a single character such as ``\A`` - and ``\Z`` are not allowed. + and ``\z`` are not allowed. .. index:: single: ^ (caret); in regular expressions @@ -661,11 +661,17 @@ character ``'$'``. matches characters which are neither alphanumeric in the current locale nor the underscore. -.. index:: single: \Z; in regular expressions +.. index:: single: \z; in regular expressions + single: \Z; in regular expressions -``\Z`` +``\z`` Matches only at the end of the string. + .. versionadded:: next + +``\Z`` + The same as ``\z``. For compatibility with old Python versions. + .. index:: single: \a; in regular expressions single: \b; in regular expressions diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 2f8b652d47e428..6eb12f1c6f00df 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -624,6 +624,11 @@ Other language changes ASCII :class:`bytes` and :term:`bytes-like objects <bytes-like object>`. (Contributed by Daniel Pope in :gh:`129349`.) +* Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions <re>`. + It is interpreted unambiguously in many other regular expression engines, + unlike ``\Z``, which has subtly different behavior. + (Contributed by Serhiy Storchaka in :gh:`133306`.) + * ``\B`` in :mod:`regular expression <re>` now matches empty input string. Now it is always the opposite of ``\b``. (Contributed by Serhiy Storchaka in :gh:`124130`.) diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index 7e8abbf6ffe155..af2808a77da691 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -61,7 +61,7 @@ resulting RE will match the second character. \number Matches the contents of the group of the same number. \A Matches only at the start of the string. - \Z Matches only at the end of the string. + \z Matches only at the end of the string. \b Matches the empty string, but only at the start or end of a word. \B Matches the empty string, but not at the start or end of a word. \d Matches any decimal digit; equivalent to the set [0-9] in diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py index 0990255b22c219..35ab7ede2a75a9 100644 --- a/Lib/re/_parser.py +++ b/Lib/re/_parser.py @@ -49,7 +49,8 @@ r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), - r"\Z": (AT, AT_END_STRING), # end of string + r"\z": (AT, AT_END_STRING), # end of string + r"\Z": (AT, AT_END_STRING), # end of string (obsolete) } FLAGS = { diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index 85b026736caac8..e50f5d52bbdec0 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -531,7 +531,7 @@ (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), # bug 127259: \Z shouldn't depend on multiline mode - (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''), + (r'(?ms).*?x\s*\z(.*)','xx\nx\n', SUCCEED, 'g1', ''), # bug 128899: uppercase literals under the ignorecase flag (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'), (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'), diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index cf8525ed901ad3..f79a6149078996 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -619,6 +619,7 @@ def test_re_fullmatch(self): self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4)) self.assertIsNone(re.fullmatch(r"a+", "ab")) self.assertIsNone(re.fullmatch(r"abc$", "abc\n")) + self.assertIsNone(re.fullmatch(r"abc\z", "abc\n")) self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n")) self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n")) self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4)) @@ -802,6 +803,8 @@ def test_special_escapes(self): self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.ASCII).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") + self.assertEqual(re.search(r"^\Aabc\z$", "abc", re.M).group(0), "abc") + self.assertIsNone(re.search(r"^\Aabc\z$", "\nabc\n", re.M)) self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) self.assertEqual(re.search(br"\b(b.)\b", @@ -813,6 +816,8 @@ def test_special_escapes(self): self.assertEqual(re.search(br"\B(b.)\B", b"abc bcd bc abxd", re.LOCALE).group(1), b"bx") self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc") + self.assertEqual(re.search(br"^\Aabc\z$", b"abc", re.M).group(0), b"abc") + self.assertIsNone(re.search(br"^\Aabc\z$", b"\nabc\n", re.M)) self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc") self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M)) self.assertEqual(re.search(r"\d\D\w\W\s\S", @@ -836,7 +841,7 @@ def test_other_escapes(self): self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^') self.assertIsNone(re.match(r"[\^a]+", 'b')) re.purge() # for warnings - for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': + for c in 'ceghijklmopqyCEFGHIJKLMNOPQRTVXY': with self.subTest(c): self.assertRaises(re.PatternError, re.compile, '\\%c' % c) for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': @@ -2608,8 +2613,8 @@ def test_findall_atomic_grouping(self): self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab']) def test_bug_gh91616(self): - self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer - self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt")) + self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\z', "a.txt")) # reproducer + self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\z', "a.txt")) def test_bug_gh100061(self): # gh-100061 diff --git a/Misc/NEWS.d/next/Library/2025-05-02-21-35-03.gh-issue-133306.-vBye5.rst b/Misc/NEWS.d/next/Library/2025-05-02-21-35-03.gh-issue-133306.-vBye5.rst new file mode 100644 index 00000000000000..d0973af5ffc352 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-02-21-35-03.gh-issue-133306.-vBye5.rst @@ -0,0 +1 @@ +Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions <re>`. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com