https://github.com/python/cpython/commit/a3711d1541c1b7987941b41d2247f87dae347117
commit: a3711d1541c1b7987941b41d2247f87dae347117
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2025-01-02T12:11:21Z
summary:
gh-124130: Fix a bug in matching regular expression \B in empty string
(GH-127007)
files:
A Misc/NEWS.d/next/Library/2024-11-19-10-46-57.gh-issue-124130.OZ_vR5.rst
M Doc/library/re.rst
M Doc/whatsnew/3.14.rst
M Lib/test/test_re.py
M Modules/_sre/sre_lib.h
diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 9db6f1da3be4db..29387a429b844c 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -572,11 +572,8 @@ character ``'$'``.
Word boundaries are determined by the current locale
if the :py:const:`~re.LOCALE` flag is used.
- .. note::
-
- Note that ``\B`` does not match an empty string, which differs from
- RE implementations in other programming languages such as Perl.
- This behavior is kept for compatibility reasons.
+ .. versionchanged:: next
+ ``\B`` now matches empty input string.
.. index:: single: \d; in regular expressions
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 4b3f1b2e8eed42..61f5ffdb6c89d1 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -245,6 +245,10 @@ Other language changes
making it a :term:`generic type`.
(Contributed by Brian Schubert in :gh:`126012`.)
+* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
+ Now it is always the opposite of ``\b``.
+ (Contributed by Serhiy Storchaka in :gh:`124130`.)
+
* iOS and macOS apps can now be configured to redirect ``stdout`` and
``stderr`` content to the system log. (Contributed by Russell Keith-Magee in
:gh:`127592`.)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 0d3599be87f228..5538de60b2a03a 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -978,18 +978,15 @@ def test_word_boundaries(self):
self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE))
self.assertIsNone(re.fullmatch(r".+\B", "ьюя"))
self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII))
- # However, an empty string contains no word boundaries, and also no
- # non-boundaries.
+ # However, an empty string contains no word boundaries.
self.assertIsNone(re.search(r"\b", ""))
self.assertIsNone(re.search(r"\b", "", re.ASCII))
self.assertIsNone(re.search(br"\b", b""))
self.assertIsNone(re.search(br"\b", b"", re.LOCALE))
- # This one is questionable and different from the perlre behaviour,
- # but describes current behavior.
- self.assertIsNone(re.search(r"\B", ""))
- self.assertIsNone(re.search(r"\B", "", re.ASCII))
- self.assertIsNone(re.search(br"\B", b""))
- self.assertIsNone(re.search(br"\B", b"", re.LOCALE))
+ self.assertTrue(re.search(r"\B", ""))
+ self.assertTrue(re.search(r"\B", "", re.ASCII))
+ self.assertTrue(re.search(br"\B", b""))
+ self.assertTrue(re.search(br"\B", b"", re.LOCALE))
# A single word-character string has two boundaries, but no
# non-boundary gaps.
self.assertEqual(len(re.findall(r"\b", "a")), 2)
diff --git
a/Misc/NEWS.d/next/Library/2024-11-19-10-46-57.gh-issue-124130.OZ_vR5.rst
b/Misc/NEWS.d/next/Library/2024-11-19-10-46-57.gh-issue-124130.OZ_vR5.rst
new file mode 100644
index 00000000000000..a1d4fc8ff4c22f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-19-10-46-57.gh-issue-124130.OZ_vR5.rst
@@ -0,0 +1,4 @@
+Fix a bug in matching regular expression ``\B`` in empty input string.
+Now it is always the opposite of ``\b``.
+To get an old behavior, use ``(?!\A\Z)\B``.
+To get a new behavior in old Python versions, use ``(?!\b)``.
diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h
index af4bfc56083bcb..df377905bfae0d 100644
--- a/Modules/_sre/sre_lib.h
+++ b/Modules/_sre/sre_lib.h
@@ -42,8 +42,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
return ((void*) ptr == state->end);
case SRE_AT_BOUNDARY:
- if (state->beginning == state->end)
- return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
@@ -51,8 +49,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
return thisp != thatp;
case SRE_AT_NON_BOUNDARY:
- if (state->beginning == state->end)
- return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
@@ -60,8 +56,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
return thisp == thatp;
case SRE_AT_LOC_BOUNDARY:
- if (state->beginning == state->end)
- return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
@@ -69,8 +63,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
return thisp != thatp;
case SRE_AT_LOC_NON_BOUNDARY:
- if (state->beginning == state->end)
- return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
@@ -78,8 +70,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
return thisp == thatp;
case SRE_AT_UNI_BOUNDARY:
- if (state->beginning == state->end)
- return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
@@ -87,8 +77,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
return thisp != thatp;
case SRE_AT_UNI_NON_BOUNDARY:
- if (state->beginning == state->end)
- return 0;
thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
thisp = ((void*) ptr < state->end) ?
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]