https://github.com/python/cpython/commit/bc9e63dd9d2931771415cca1b0ed774471d523c0
commit: bc9e63dd9d2931771415cca1b0ed774471d523c0
branch: main
author: Alper <[email protected]>
committer: colesbury <[email protected]>
date: 2025-11-26T15:40:45-05:00
summary:
gh-116738: Fix thread-safety issue in re module for free threading (gh-141923)
Added atomic operations to `scanner_begin()` and `scanner_end()` to prevent
race conditions on the `executing` flag in free-threaded builds. Also added
tests for concurrent usage of the `re` module.
Without the atomic operations, `test_scanner_concurrent_access()` triggers
`assert(self->executing)` failures, or a thread sanitizer run emits errors.
files:
A Lib/test/test_free_threading/test_re.py
A
Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
M Include/internal/pycore_pyatomic_ft_wrappers.h
M Modules/_sre/sre.c
diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h
b/Include/internal/pycore_pyatomic_ft_wrappers.h
index c31c33657002ec..2ae0185226f847 100644
--- a/Include/internal/pycore_pyatomic_ft_wrappers.h
+++ b/Include/internal/pycore_pyatomic_ft_wrappers.h
@@ -77,6 +77,10 @@ extern "C" {
_Py_atomic_store_ushort_relaxed(&value, new_value)
#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \
_Py_atomic_load_ushort_relaxed(&value)
+#define FT_ATOMIC_LOAD_INT(value) \
+ _Py_atomic_load_int(&value)
+#define FT_ATOMIC_STORE_INT(value, new_value) \
+ _Py_atomic_store_int(&value, new_value)
#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \
_Py_atomic_store_int_relaxed(&value, new_value)
#define FT_ATOMIC_LOAD_INT_RELAXED(value) \
@@ -144,6 +148,8 @@ extern "C" {
#define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value
#define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value
+#define FT_ATOMIC_LOAD_INT(value) value
+#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value
#define FT_ATOMIC_LOAD_INT_RELAXED(value) value
#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value
#define FT_ATOMIC_LOAD_UINT_RELAXED(value) value
diff --git a/Lib/test/test_free_threading/test_re.py
b/Lib/test/test_free_threading/test_re.py
new file mode 100644
index 00000000000000..56f25045d1bf8e
--- /dev/null
+++ b/Lib/test/test_free_threading/test_re.py
@@ -0,0 +1,62 @@
+import re
+import unittest
+
+from test.support import threading_helper
+from test.support.threading_helper import run_concurrently
+
+
+NTHREADS = 10
+
+
+@threading_helper.requires_working_threading()
+class TestRe(unittest.TestCase):
+ def test_pattern_sub(self):
+ """Pattern substitution should work across threads"""
+ pattern = re.compile(r"\w+@\w+\.\w+")
+ text = "e-mail: [email protected] or [email protected]. " * 5
+ results = []
+
+ def worker():
+ substituted = pattern.sub("(redacted)", text)
+ results.append(substituted.count("(redacted)"))
+
+ run_concurrently(worker_func=worker, nthreads=NTHREADS)
+ self.assertEqual(results, [2 * 5] * NTHREADS)
+
+ def test_pattern_search(self):
+ """Pattern search should work across threads."""
+ emails = ["[email protected]", "[email protected]"] * 10
+ pattern = re.compile(r"\w+@\w+\.\w+")
+ results = []
+
+ def worker():
+ matches = [pattern.search(e).group() for e in emails]
+ results.append(len(matches))
+
+ run_concurrently(worker_func=worker, nthreads=NTHREADS)
+ self.assertEqual(results, [2 * 10] * NTHREADS)
+
+ def test_scanner_concurrent_access(self):
+ """Shared scanner should reject concurrent access."""
+ pattern = re.compile(r"\w+")
+ scanner = pattern.scanner("word " * 10)
+
+ def worker():
+ for _ in range(100):
+ try:
+ scanner.search()
+ except ValueError as e:
+ if "already executing" in str(e):
+ pass
+ else:
+ raise
+
+ run_concurrently(worker_func=worker, nthreads=NTHREADS)
+ # This test has no assertions. Its purpose is to catch crashes and
+ # enable thread sanitizer to detect race conditions. While "already
+ # executing" errors are very likely, they're not guaranteed due to
+ # non-deterministic thread scheduling, so we can't assert errors > 0.
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
new file mode 100644
index 00000000000000..151f8968292a61
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
@@ -0,0 +1,2 @@
+Fix thread safety issue with :mod:`re` scanner objects in free-threaded
+builds.
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 4e97101b699876..59ff9078e6cff4 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -2841,20 +2841,25 @@ scanner_dealloc(PyObject *self)
static int
scanner_begin(ScannerObject* self)
{
- if (self->executing) {
+#ifdef Py_GIL_DISABLED
+ int was_executing = _Py_atomic_exchange_int(&self->executing, 1);
+#else
+ int was_executing = self->executing;
+ self->executing = 1;
+#endif
+ if (was_executing) {
PyErr_SetString(PyExc_ValueError,
"regular expression scanner already executing");
return 0;
}
- self->executing = 1;
return 1;
}
static void
scanner_end(ScannerObject* self)
{
- assert(self->executing);
- self->executing = 0;
+ assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing));
+ FT_ATOMIC_STORE_INT(self->executing, 0);
}
/*[clinic input]
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]