https://github.com/python/cpython/commit/3d4ac1a2c2b610f35a9e164878d67185e4a3546f commit: 3d4ac1a2c2b610f35a9e164878d67185e4a3546f branch: main author: Sam Gross <colesb...@gmail.com> committer: colesbury <colesb...@gmail.com> date: 2025-03-26T12:08:20-04:00 summary:
gh-123358: Use `_PyStackRef` in `LOAD_DEREF` (gh-130064) Concurrent accesses from multiple threads to the same `cell` object did not scale well in the free-threaded build. Use `_PyStackRef` and optimistically avoid locking to improve scaling. With the locks around cell reads gone, some of the free threading tests were prone to starvation: the readers were able to run in a tight loop and the writer threads weren't scheduled frequently enough to make timely progress. Adjust the tests to avoid this. Co-authored-by: Donghee Na <donghee...@python.org> files: M Include/internal/pycore_cell.h M Include/internal/pycore_opcode_metadata.h M Include/internal/pycore_uop_metadata.h M Lib/test/test_free_threading/test_dict.py M Lib/test/test_free_threading/test_func_annotations.py M Lib/test/test_free_threading/test_gc.py M Lib/test/test_free_threading/test_list.py M Lib/test/test_free_threading/test_monitoring.py M Lib/test/test_free_threading/test_type.py M Python/bytecodes.c M Python/executor_cases.c.h M Python/generated_cases.c.h diff --git a/Include/internal/pycore_cell.h b/Include/internal/pycore_cell.h index 27f67d57b2fb79..cef01e80514f4b 100644 --- a/Include/internal/pycore_cell.h +++ b/Include/internal/pycore_cell.h @@ -2,6 +2,8 @@ #define Py_INTERNAL_CELL_H #include "pycore_critical_section.h" +#include "pycore_object.h" +#include "pycore_stackref.h" #ifdef __cplusplus extern "C" { @@ -19,7 +21,7 @@ PyCell_SwapTakeRef(PyCellObject *cell, PyObject *value) PyObject *old_value; Py_BEGIN_CRITICAL_SECTION(cell); old_value = cell->ob_ref; - cell->ob_ref = value; + FT_ATOMIC_STORE_PTR_RELEASE(cell->ob_ref, value); Py_END_CRITICAL_SECTION(); return old_value; } @@ -37,11 +39,36 @@ PyCell_GetRef(PyCellObject *cell) { PyObject *res; Py_BEGIN_CRITICAL_SECTION(cell); +#ifdef Py_GIL_DISABLED + res = _Py_XNewRefWithLock(cell->ob_ref); +#else res = Py_XNewRef(cell->ob_ref); +#endif Py_END_CRITICAL_SECTION(); return res; } +static inline _PyStackRef +_PyCell_GetStackRef(PyCellObject *cell) +{ + PyObject *value; +#ifdef Py_GIL_DISABLED + value = _Py_atomic_load_ptr(&cell->ob_ref); + if (value == NULL) { + return PyStackRef_NULL; + } + _PyStackRef ref; + if (_Py_TryIncrefCompareStackRef(&cell->ob_ref, value, &ref)) { + return ref; + } +#endif + value = PyCell_GetRef(cell); + if (value == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectSteal(value); +} + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 096cd0b5e8db67..9c101302591872 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1195,7 +1195,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, [LOAD_CONST_IMMORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, [LOAD_CONST_MORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, - [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 7c2c8715fb4862..59572fca3753d6 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -131,7 +131,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_MAKE_CELL] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_DELETE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_LOAD_FROM_DICT_OR_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_DEREF] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG, [_COPY_FREE_VARS] = HAS_ARG_FLAG, [_BUILD_STRING] = HAS_ARG_FLAG | HAS_ERROR_FLAG, diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py index 4f605e0c51f0d5..476cc3178d843f 100644 --- a/Lib/test/test_free_threading/test_dict.py +++ b/Lib/test/test_free_threading/test_dict.py @@ -74,6 +74,7 @@ def writer_func(name): last = -1 while True: if CUR == last: + time.sleep(0.001) continue elif CUR == OBJECT_COUNT: break diff --git a/Lib/test/test_free_threading/test_func_annotations.py b/Lib/test/test_free_threading/test_func_annotations.py index b3e92956072c75..0fde0136d77c7f 100644 --- a/Lib/test/test_free_threading/test_func_annotations.py +++ b/Lib/test/test_free_threading/test_func_annotations.py @@ -27,13 +27,13 @@ def set_func_annotation(f, b): @unittest.skipUnless(Py_GIL_DISABLED, "Enable only in FT build") class TestFTFuncAnnotations(TestCase): - NUM_THREADS = 8 + NUM_THREADS = 4 def test_concurrent_read(self): def f(x: int) -> int: return x + 1 - for _ in range(100): + for _ in range(10): with concurrent.futures.ThreadPoolExecutor(max_workers=self.NUM_THREADS) as executor: b = Barrier(self.NUM_THREADS) futures = {executor.submit(get_func_annotation, f, b): i for i in range(self.NUM_THREADS)} @@ -54,7 +54,7 @@ def test_concurrent_write(self): def bar(x: int, y: float) -> float: return y ** x - for _ in range(100): + for _ in range(10): with concurrent.futures.ThreadPoolExecutor(max_workers=self.NUM_THREADS) as executor: b = Barrier(self.NUM_THREADS) futures = {executor.submit(set_func_annotation, bar, b): i for i in range(self.NUM_THREADS)} diff --git a/Lib/test/test_free_threading/test_gc.py b/Lib/test/test_free_threading/test_gc.py index 401067fe9c612c..3b83e0431efa6b 100644 --- a/Lib/test/test_free_threading/test_gc.py +++ b/Lib/test/test_free_threading/test_gc.py @@ -35,24 +35,30 @@ def mutator_thread(): pass def test_get_referrers(self): + NUM_GC = 2 + NUM_MUTATORS = 4 + + b = threading.Barrier(NUM_GC + NUM_MUTATORS) event = threading.Event() obj = MyObj() def gc_thread(): + b.wait() for i in range(100): o = gc.get_referrers(obj) event.set() def mutator_thread(): + b.wait() while not event.is_set(): d1 = { "key": obj } d2 = { "key": obj } d3 = { "key": obj } d4 = { "key": obj } - gcs = [Thread(target=gc_thread) for _ in range(2)] - mutators = [Thread(target=mutator_thread) for _ in range(4)] + gcs = [Thread(target=gc_thread) for _ in range(NUM_GC)] + mutators = [Thread(target=mutator_thread) for _ in range(NUM_MUTATORS)] with threading_helper.start_threads(gcs + mutators): pass diff --git a/Lib/test/test_free_threading/test_list.py b/Lib/test/test_free_threading/test_list.py index 2c9af65273a1c8..44c0ac74e02aa3 100644 --- a/Lib/test/test_free_threading/test_list.py +++ b/Lib/test/test_free_threading/test_list.py @@ -20,11 +20,14 @@ class TestList(TestCase): def test_racing_iter_append(self): l = [] - def writer_func(): + barrier = Barrier(NTHREAD + 1) + def writer_func(l): + barrier.wait() for i in range(OBJECT_COUNT): l.append(C(i + OBJECT_COUNT)) - def reader_func(): + def reader_func(l): + barrier.wait() while True: count = len(l) for i, x in enumerate(l): @@ -32,10 +35,10 @@ def reader_func(): if count == OBJECT_COUNT: break - writer = Thread(target=writer_func) + writer = Thread(target=writer_func, args=(l,)) readers = [] for x in range(NTHREAD): - reader = Thread(target=reader_func) + reader = Thread(target=reader_func, args=(l,)) readers.append(reader) reader.start() @@ -47,11 +50,14 @@ def reader_func(): def test_racing_iter_extend(self): l = [] + barrier = Barrier(NTHREAD + 1) def writer_func(): + barrier.wait() for i in range(OBJECT_COUNT): l.extend([C(i + OBJECT_COUNT)]) def reader_func(): + barrier.wait() while True: count = len(l) for i, x in enumerate(l): diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index 8fec01715531cb..a480e398722c33 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -8,7 +8,7 @@ from sys import monitoring from test.support import threading_helper -from threading import Thread, _PyRLock +from threading import Thread, _PyRLock, Barrier from unittest import TestCase @@ -194,7 +194,9 @@ def during_threads(self): @threading_helper.requires_working_threading() class MonitoringMisc(MonitoringTestMixin, TestCase): - def register_callback(self): + def register_callback(self, barrier): + barrier.wait() + def callback(*args): pass @@ -206,8 +208,9 @@ def callback(*args): def test_register_callback(self): self.refs = [] threads = [] - for i in range(50): - t = Thread(target=self.register_callback) + barrier = Barrier(5) + for i in range(5): + t = Thread(target=self.register_callback, args=(barrier,)) t.start() threads.append(t) diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 53f6d778bbecbc..ae996e7db3c7fd 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -45,26 +45,20 @@ def test_attr_cache_consistency(self): class C: x = 0 - DONE = False def writer_func(): - for i in range(3000): + for _ in range(3000): C.x C.x C.x += 1 - nonlocal DONE - DONE = True def reader_func(): - while True: + for _ in range(3000): # We should always see a greater value read from the type than the # dictionary a = C.__dict__['x'] b = C.x self.assertGreaterEqual(b, a) - if DONE: - break - self.run_one(writer_func, reader_func) def test_attr_cache_consistency_subclass(self): @@ -74,26 +68,20 @@ class C: class D(C): pass - DONE = False def writer_func(): - for i in range(3000): + for _ in range(3000): D.x D.x C.x += 1 - nonlocal DONE - DONE = True def reader_func(): - while True: + for _ in range(3000): # We should always see a greater value read from the type than the # dictionary a = C.__dict__['x'] b = D.x self.assertGreaterEqual(b, a) - if DONE: - break - self.run_one(writer_func, reader_func) def test___class___modification(self): @@ -140,10 +128,18 @@ class ClassB(Base): def run_one(self, writer_func, reader_func): - writer = Thread(target=writer_func) + barrier = threading.Barrier(NTHREADS) + + def wrap_target(target): + def wrapper(): + barrier.wait() + target() + return wrapper + + writer = Thread(target=wrap_target(writer_func)) readers = [] - for x in range(30): - reader = Thread(target=reader_func) + for x in range(NTHREADS - 1): + reader = Thread(target=wrap_target(reader_func)) readers.append(reader) reader.start() diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 44e8ea2450800c..b6a482183b1d0b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1822,12 +1822,11 @@ dummy_func( inst(LOAD_DEREF, ( -- value)) { PyCellObject *cell = (PyCellObject *)PyStackRef_AsPyObjectBorrow(GETLOCAL(oparg)); - PyObject *value_o = PyCell_GetRef(cell); - if (value_o == NULL) { + value = _PyCell_GetStackRef(cell); + if (PyStackRef_IsNull(value)) { _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg); ERROR_IF(true, error); } - value = PyStackRef_FromPyObjectSteal(value_o); } inst(STORE_DEREF, (v --)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 43f9a81d2c70f8..6d5f3dd1f6b935 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2479,14 +2479,18 @@ _PyStackRef value; oparg = CURRENT_OPARG(); PyCellObject *cell = (PyCellObject *)PyStackRef_AsPyObjectBorrow(GETLOCAL(oparg)); - PyObject *value_o = PyCell_GetRef(cell); - if (value_o == NULL) { + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyCell_GetStackRef(cell); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(value)) { + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg); stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_ERROR(); } - value = PyStackRef_FromPyObjectSteal(value_o); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 493d99db2d3677..f6a538e98fe129 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8822,14 +8822,18 @@ INSTRUCTION_STATS(LOAD_DEREF); _PyStackRef value; PyCellObject *cell = (PyCellObject *)PyStackRef_AsPyObjectBorrow(GETLOCAL(oparg)); - PyObject *value_o = PyCell_GetRef(cell); - if (value_o == NULL) { + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyCell_GetStackRef(cell); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (PyStackRef_IsNull(value)) { + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg); stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } - value = PyStackRef_FromPyObjectSteal(value_o); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com