https://github.com/python/cpython/commit/81ef1b73175378543534ebe1f148c22deb8239b3
commit: 81ef1b73175378543534ebe1f148c22deb8239b3
branch: main
author: Hai Zhu <[email protected]>
committer: markshannon <[email protected]>
date: 2026-03-16T15:58:18Z
summary:
gh-144888: Replace bloom filter linked lists with continuous arrays to
optimize executor invalidating performance (GH-145873)
files:
M Include/internal/pycore_interp_structs.h
M Include/internal/pycore_optimizer.h
M InternalDocs/jit.md
M Modules/_testinternalcapi.c
M Objects/funcobject.c
M Python/jit.c
M Python/optimizer.c
M Python/pylifecycle.c
M Python/pystate.c
diff --git a/Include/internal/pycore_interp_structs.h
b/Include/internal/pycore_interp_structs.h
index 776fb9575c2365..e2008f8303e21b 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -14,6 +14,7 @@ extern "C" {
#include "pycore_structs.h" // PyHamtObject
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_typedefs.h" // _PyRuntimeState
+#include "pycore_uop.h" // _PyBloomFilter
#define CODE_MAX_WATCHERS 8
#define CONTEXT_MAX_WATCHERS 8
@@ -972,7 +973,10 @@ struct _is {
// Optimization configuration (thresholds and flags for JIT and
interpreter)
_PyOptimizationConfig opt_config;
- struct _PyExecutorObject *executor_list_head;
+ _PyBloomFilter *executor_blooms; // Contiguous bloom filter
array
+ struct _PyExecutorObject **executor_ptrs; // Corresponding executor
pointer array
+ size_t executor_count; // Number of valid executors
+ size_t executor_capacity; // Array capacity
struct _PyExecutorObject *executor_deletion_list_head;
struct _PyExecutorObject *cold_executor;
struct _PyExecutorObject *cold_dynamic_executor;
diff --git a/Include/internal/pycore_optimizer.h
b/Include/internal/pycore_optimizer.h
index c63f0167a0f64a..cea9fcce237301 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -128,8 +128,8 @@ typedef struct {
bool cold;
uint8_t pending_deletion;
int32_t index; // Index of ENTER_EXECUTOR (if code isn't NULL,
below).
- _PyBloomFilter bloom;
- _PyExecutorLinkListNode links;
+ int32_t bloom_array_idx; // Index in
interp->executor_blooms/executor_ptrs.
+ _PyExecutorLinkListNode links; // Used by deletion list.
PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
} _PyVMData;
@@ -157,7 +157,7 @@ typedef struct _PyExecutorObject {
// Export for '_opcode' shared extension (JIT compiler).
PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
-void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
+int _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
void _Py_ExecutorDetach(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *);
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md
index 1740b22b85f77b..decfccad2d8d37 100644
--- a/InternalDocs/jit.md
+++ b/InternalDocs/jit.md
@@ -78,8 +78,8 @@ and execution returns to the adaptive interpreter.
## Invalidating Executors
In addition to being stored on the code object, each executor is also
-inserted into a list of all executors, which is stored in the interpreter
-state's `executor_list_head` field. This list is used when it is necessary
+inserted into contiguous arrays (`executor_blooms` and `executor_ptrs`)
+stored in the interpreter state. These arrays are used when it is necessary
to invalidate executors because values they used in their construction may
have changed.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index aa5911ef2fb449..8c316b7c8ddda0 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -278,10 +278,8 @@ get_jit_code_ranges(PyObject *self, PyObject
*Py_UNUSED(args))
if (interp == NULL) {
return ranges;
}
- for (_PyExecutorObject *exec = interp->executor_list_head;
- exec != NULL;
- exec = exec->vm_data.links.next)
- {
+ for (size_t i = 0; i < interp->executor_count; i++) {
+ _PyExecutorObject *exec = interp->executor_ptrs[i];
if (exec->jit_code == NULL || exec->jit_size == 0) {
continue;
}
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index fc32826fb3a861..a7a3d9c78ef3d0 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -12,7 +12,6 @@
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS()
-#include "pycore_optimizer.h" // _Py_Executors_InvalidateDependency
static const char *
func_event_name(PyFunction_WatchEvent event) {
diff --git a/Python/jit.c b/Python/jit.c
index 3e0a0aa8bfcc81..4990c743224d3c 100644
--- a/Python/jit.c
+++ b/Python/jit.c
@@ -62,6 +62,23 @@ jit_error(const char *message)
static size_t _Py_jit_shim_size = 0;
+static int
+address_in_executor_array(_PyExecutorObject **ptrs, size_t count, uintptr_t
addr)
+{
+ for (size_t i = 0; i < count; i++) {
+ _PyExecutorObject *exec = ptrs[i];
+ if (exec->jit_code == NULL || exec->jit_size == 0) {
+ continue;
+ }
+ uintptr_t start = (uintptr_t)exec->jit_code;
+ uintptr_t end = start + exec->jit_size;
+ if (addr >= start && addr < end) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
static int
address_in_executor_list(_PyExecutorObject *head, uintptr_t addr)
{
@@ -94,7 +111,7 @@ _PyJIT_AddressInJitCode(PyInterpreterState *interp,
uintptr_t addr)
return 1;
}
}
- if (address_in_executor_list(interp->executor_list_head, addr)) {
+ if (address_in_executor_array(interp->executor_ptrs,
interp->executor_count, addr)) {
return 1;
}
if (address_in_executor_list(interp->executor_deletion_list_head, addr)) {
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 7315bb6b9f603d..a9e788d0dcb1d5 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -1379,7 +1379,10 @@ make_executor_from_uops(_PyThreadStateImpl *tstate,
_PyUOpInstruction *buffer, i
// linking of executor. Otherwise, the GC tries to untrack a
// still untracked object during dealloc.
_PyObject_GC_TRACK(executor);
- _Py_ExecutorInit(executor, dependencies);
+ if (_Py_ExecutorInit(executor, dependencies) < 0) {
+ Py_DECREF(executor);
+ return NULL;
+ }
#ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0;
@@ -1646,59 +1649,63 @@ bloom_filter_may_contain(_PyBloomFilter *bloom,
_PyBloomFilter *hashes)
return true;
}
-static void
-link_executor(_PyExecutorObject *executor)
+static int
+link_executor(_PyExecutorObject *executor, const _PyBloomFilter *bloom)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- _PyExecutorLinkListNode *links = &executor->vm_data.links;
- _PyExecutorObject *head = interp->executor_list_head;
- if (head == NULL) {
- interp->executor_list_head = executor;
- links->previous = NULL;
- links->next = NULL;
- }
- else {
- assert(head->vm_data.links.previous == NULL);
- links->previous = NULL;
- links->next = head;
- head->vm_data.links.previous = executor;
- interp->executor_list_head = executor;
- }
- /* executor_list_head must be first in list */
- assert(interp->executor_list_head->vm_data.links.previous == NULL);
+ if (interp->executor_count == interp->executor_capacity) {
+ size_t new_cap = interp->executor_capacity ? interp->executor_capacity
* 2 : 64;
+ _PyBloomFilter *new_blooms = PyMem_Realloc(
+ interp->executor_blooms, new_cap * sizeof(_PyBloomFilter));
+ if (new_blooms == NULL) {
+ return -1;
+ }
+ _PyExecutorObject **new_ptrs = PyMem_Realloc(
+ interp->executor_ptrs, new_cap * sizeof(_PyExecutorObject *));
+ if (new_ptrs == NULL) {
+ /* Revert blooms realloc — the old pointer may have been freed by
+ * a successful realloc, but new_blooms is the valid pointer. */
+ interp->executor_blooms = new_blooms;
+ return -1;
+ }
+ interp->executor_blooms = new_blooms;
+ interp->executor_ptrs = new_ptrs;
+ interp->executor_capacity = new_cap;
+ }
+ size_t idx = interp->executor_count++;
+ interp->executor_blooms[idx] = *bloom;
+ interp->executor_ptrs[idx] = executor;
+ executor->vm_data.bloom_array_idx = (int32_t)idx;
+ return 0;
}
static void
unlink_executor(_PyExecutorObject *executor)
{
- _PyExecutorLinkListNode *links = &executor->vm_data.links;
- _PyExecutorObject *next = links->next;
- _PyExecutorObject *prev = links->previous;
- if (next != NULL) {
- next->vm_data.links.previous = prev;
- }
- if (prev != NULL) {
- prev->vm_data.links.next = next;
- }
- else {
- // prev == NULL implies that executor is the list head
- PyInterpreterState *interp = PyInterpreterState_Get();
- assert(interp->executor_list_head == executor);
- interp->executor_list_head = next;
+ PyInterpreterState *interp = PyInterpreterState_Get();
+ int32_t idx = executor->vm_data.bloom_array_idx;
+ assert(idx >= 0 && (size_t)idx < interp->executor_count);
+ size_t last = --interp->executor_count;
+ if ((size_t)idx != last) {
+ /* Swap-remove: move the last element into the vacated slot */
+ interp->executor_blooms[idx] = interp->executor_blooms[last];
+ interp->executor_ptrs[idx] = interp->executor_ptrs[last];
+ interp->executor_ptrs[idx]->vm_data.bloom_array_idx = idx;
}
+ executor->vm_data.bloom_array_idx = -1;
}
/* This must be called by optimizers before using the executor */
-void
+int
_Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter
*dependency_set)
{
executor->vm_data.valid = true;
executor->vm_data.pending_deletion = 0;
executor->vm_data.code = NULL;
- for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
- executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
+ if (link_executor(executor, dependency_set) < 0) {
+ return -1;
}
- link_executor(executor);
+ return 0;
}
static _PyExecutorObject *
@@ -1809,11 +1816,15 @@ void
_Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj)
{
assert(executor->vm_data.valid);
- _Py_BloomFilter_Add(&executor->vm_data.bloom, obj);
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int32_t idx = executor->vm_data.bloom_array_idx;
+ assert(idx >= 0 && (size_t)idx < interp->executor_count);
+ _Py_BloomFilter_Add(&interp->executor_blooms[idx], obj);
}
/* Invalidate all executors that depend on `obj`
- * May cause other executors to be invalidated as well
+ * May cause other executors to be invalidated as well.
+ * Uses contiguous bloom filter array for cache-friendly scanning.
*/
void
_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int
is_invalidation)
@@ -1821,23 +1832,20 @@ _Py_Executors_InvalidateDependency(PyInterpreterState
*interp, void *obj, int is
_PyBloomFilter obj_filter;
_Py_BloomFilter_Init(&obj_filter);
_Py_BloomFilter_Add(&obj_filter, obj);
- /* Walk the list of executors */
- /* TO DO -- Use a tree to avoid traversing as many objects */
+ /* Scan contiguous bloom filter array */
PyObject *invalidate = PyList_New(0);
if (invalidate == NULL) {
goto error;
}
/* Clearing an executor can clear others, so we need to make a list of
* executors to invalidate first */
- for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
- assert(exec->vm_data.valid);
- _PyExecutorObject *next = exec->vm_data.links.next;
- if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter) &&
- PyList_Append(invalidate, (PyObject *)exec))
+ for (size_t i = 0; i < interp->executor_count; i++) {
+ assert(interp->executor_ptrs[i]->vm_data.valid);
+ if (bloom_filter_may_contain(&interp->executor_blooms[i], &obj_filter)
&&
+ PyList_Append(invalidate, (PyObject *)interp->executor_ptrs[i]))
{
goto error;
}
- exec = next;
}
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
PyObject *exec = PyList_GET_ITEM(invalidate, i);
@@ -1859,8 +1867,9 @@ _Py_Executors_InvalidateDependency(PyInterpreterState
*interp, void *obj, int is
void
_Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
{
- while (interp->executor_list_head) {
- _PyExecutorObject *executor = interp->executor_list_head;
+ while (interp->executor_count > 0) {
+ /* Invalidate from the end to avoid repeated swap-remove shifts */
+ _PyExecutorObject *executor =
interp->executor_ptrs[interp->executor_count - 1];
assert(executor->vm_data.valid);
if (executor->vm_data.code) {
// Clear the entire code object so its co_executors array be freed:
@@ -1878,8 +1887,7 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp,
int is_invalidation)
void
_Py_Executors_InvalidateCold(PyInterpreterState *interp)
{
- /* Walk the list of executors */
- /* TO DO -- Use a tree to avoid traversing as many objects */
+ /* Scan contiguous executor array */
PyObject *invalidate = PyList_New(0);
if (invalidate == NULL) {
goto error;
@@ -1887,9 +1895,9 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
/* Clearing an executor can deallocate others, so we need to make a list of
* executors to invalidate first */
- for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
+ for (size_t i = 0; i < interp->executor_count; i++) {
+ _PyExecutorObject *exec = interp->executor_ptrs[i];
assert(exec->vm_data.valid);
- _PyExecutorObject *next = exec->vm_data.links.next;
if (exec->vm_data.cold && PyList_Append(invalidate, (PyObject *)exec)
< 0) {
goto error;
@@ -1897,8 +1905,6 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
else {
exec->vm_data.cold = true;
}
-
- exec = next;
}
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
PyObject *exec = PyList_GET_ITEM(invalidate, i);
@@ -2142,9 +2148,8 @@ _PyDumpExecutors(FILE *out)
fprintf(out, " rankdir = \"LR\"\n\n");
fprintf(out, " node [colorscheme=greys9]\n");
PyInterpreterState *interp = PyInterpreterState_Get();
- for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
- executor_to_gv(exec, out);
- exec = exec->vm_data.links.next;
+ for (size_t i = 0; i < interp->executor_count; i++) {
+ executor_to_gv(interp->executor_ptrs[i], out);
}
fprintf(out, "}\n\n");
return 0;
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 2b8e9a02cb6184..68052a91d6a1f1 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1761,6 +1761,12 @@ finalize_modules(PyThreadState *tstate)
interp->compiling = false;
#ifdef _Py_TIER2
_Py_Executors_InvalidateAll(interp, 0);
+ PyMem_Free(interp->executor_blooms);
+ PyMem_Free(interp->executor_ptrs);
+ interp->executor_blooms = NULL;
+ interp->executor_ptrs = NULL;
+ interp->executor_count = 0;
+ interp->executor_capacity = 0;
#endif
// Stop watching __builtin__ modifications
diff --git a/Python/pystate.c b/Python/pystate.c
index 17b8430b19c188..fcb73b4dcefe1d 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -597,7 +597,10 @@ init_interpreter(PyInterpreterState *interp,
interp->_code_object_generation = 0;
interp->jit = false;
interp->compiling = false;
- interp->executor_list_head = NULL;
+ interp->executor_blooms = NULL;
+ interp->executor_ptrs = NULL;
+ interp->executor_count = 0;
+ interp->executor_capacity = 0;
interp->executor_deletion_list_head = NULL;
interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]