https://github.com/python/cpython/commit/f47e928574187ae97c778e326739b3aa0c7dc765 commit: f47e928574187ae97c778e326739b3aa0c7dc765 branch: 3.14 author: Miss Islington (bot) <[email protected]> committer: colesbury <[email protected]> date: 2025-11-26T21:08:39Z summary:
[3.14] gh-116738: Fix thread-safety issue in re module for free threading (gh-141923) (gh-141990) Added atomic operations to `scanner_begin()` and `scanner_end()` to prevent race conditions on the `executing` flag in free-threaded builds. Also added tests for concurrent usage of the `re` module. Without the atomic operations, `test_scanner_concurrent_access()` triggers `assert(self->executing)` failures, or a thread sanitizer run emits errors. (cherry picked from commit bc9e63dd9d2931771415cca1b0ed774471d523c0) Co-authored-by: Alper <[email protected]> files: A Lib/test/test_free_threading/test_re.py A Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst M Include/internal/pycore_pyatomic_ft_wrappers.h M Modules/_sre/sre.c diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 3e41e2fd1569ca..223ee951e4bcc2 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -77,6 +77,10 @@ extern "C" { _Py_atomic_store_ushort_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \ _Py_atomic_load_ushort_relaxed(&value) +#define FT_ATOMIC_LOAD_INT(value) \ + _Py_atomic_load_int(&value) +#define FT_ATOMIC_STORE_INT(value, new_value) \ + _Py_atomic_store_int(&value, new_value) #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \ _Py_atomic_store_int_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_INT_RELAXED(value) \ @@ -142,6 +146,8 @@ extern "C" { #define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value #define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_INT(value) value +#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value #define FT_ATOMIC_LOAD_INT_RELAXED(value) value #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_UINT_RELAXED(value) value diff --git a/Lib/test/test_free_threading/test_re.py b/Lib/test/test_free_threading/test_re.py new file mode 100644 index 00000000000000..56f25045d1bf8e --- /dev/null +++ b/Lib/test/test_free_threading/test_re.py @@ -0,0 +1,62 @@ +import re +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestRe(unittest.TestCase): + def test_pattern_sub(self): + """Pattern substitution should work across threads""" + pattern = re.compile(r"\w+@\w+\.\w+") + text = "e-mail: [email protected] or [email protected]. " * 5 + results = [] + + def worker(): + substituted = pattern.sub("(redacted)", text) + results.append(substituted.count("(redacted)")) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(results, [2 * 5] * NTHREADS) + + def test_pattern_search(self): + """Pattern search should work across threads.""" + emails = ["[email protected]", "[email protected]"] * 10 + pattern = re.compile(r"\w+@\w+\.\w+") + results = [] + + def worker(): + matches = [pattern.search(e).group() for e in emails] + results.append(len(matches)) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(results, [2 * 10] * NTHREADS) + + def test_scanner_concurrent_access(self): + """Shared scanner should reject concurrent access.""" + pattern = re.compile(r"\w+") + scanner = pattern.scanner("word " * 10) + + def worker(): + for _ in range(100): + try: + scanner.search() + except ValueError as e: + if "already executing" in str(e): + pass + else: + raise + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + # This test has no assertions. Its purpose is to catch crashes and + # enable thread sanitizer to detect race conditions. While "already + # executing" errors are very likely, they're not guaranteed due to + # non-deterministic thread scheduling, so we can't assert errors > 0. + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst new file mode 100644 index 00000000000000..151f8968292a61 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst @@ -0,0 +1,2 @@ +Fix thread safety issue with :mod:`re` scanner objects in free-threaded +builds. diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 2761c76a3792f8..249d6fffc32cc2 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -2835,20 +2835,25 @@ scanner_dealloc(PyObject *self) static int scanner_begin(ScannerObject* self) { - if (self->executing) { +#ifdef Py_GIL_DISABLED + int was_executing = _Py_atomic_exchange_int(&self->executing, 1); +#else + int was_executing = self->executing; + self->executing = 1; +#endif + if (was_executing) { PyErr_SetString(PyExc_ValueError, "regular expression scanner already executing"); return 0; } - self->executing = 1; return 1; } static void scanner_end(ScannerObject* self) { - assert(self->executing); - self->executing = 0; + assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing)); + FT_ATOMIC_STORE_INT(self->executing, 0); } /*[clinic input] _______________________________________________ Python-checkins mailing list -- [email protected] To unsubscribe send an email to [email protected] https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: [email protected]
