https://github.com/python/cpython/commit/f9f6156c5affc039d4ee6b6f4999daf0d5896428
commit: f9f6156c5affc039d4ee6b6f4999daf0d5896428
branch: main
author: Mark Shannon <m...@hotpy.org>
committer: markshannon <m...@hotpy.org>
date: 2024-02-13T14:16:37Z
summary:

GH-113710: Backedge counter improvements. (GH-115166)

files:
M Include/cpython/optimizer.h
M Include/internal/pycore_interp.h
M Python/bytecodes.c
M Python/generated_cases.c.h
M Python/optimizer.c
M Python/pylifecycle.c
M Python/pystate.c

diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h
index 3928eca583ba5b..f710ca76b2ba24 100644
--- a/Include/cpython/optimizer.h
+++ b/Include/cpython/optimizer.h
@@ -71,6 +71,8 @@ typedef struct {
 
 PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT 
*instr, _PyExecutorObject *executor);
 
+_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, 
_PyOptimizerObject* optimizer);
+
 PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
 
 PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
@@ -80,8 +82,6 @@ PyAPI_FUNC(_PyExecutorObject *) 
PyUnstable_GetExecutor(PyCodeObject *code, int o
 int
 _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, 
PyObject **stack_pointer);
 
-extern _PyOptimizerObject _PyOptimizer_Default;
-
 void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *);
 void _Py_ExecutorClear(_PyExecutorObject *);
 void _Py_BloomFilter_Init(_PyBloomFilter *);
@@ -96,7 +96,11 @@ PyAPI_FUNC(PyObject 
*)PyUnstable_Optimizer_NewUOpOptimizer(void);
 
 #define OPTIMIZER_BITS_IN_COUNTER 4
 /* Minimum of 16 additional executions before retry */
-#define MINIMUM_TIER2_BACKOFF 4
+#define MIN_TIER2_BACKOFF 4
+#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
+#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
+/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
+#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX
 
 #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index 485b1914a44885..c244d8966f238b 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -239,8 +239,10 @@ struct _is {
     struct callable_cache callable_cache;
     _PyOptimizerObject *optimizer;
     _PyExecutorObject *executor_list_head;
-    uint16_t optimizer_resume_threshold;
-    uint16_t optimizer_backedge_threshold;
+    /* These values are shifted and offset to speed up check in JUMP_BACKWARD 
*/
+    uint32_t optimizer_resume_threshold;
+    uint32_t optimizer_backedge_threshold;
+
     uint32_t next_func_version;
     _rare_events rare_events;
     PyDict_WatchCallback builtins_dict_watcher;
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index f7c7e3669b7e6f..2ad5878f52e90b 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2318,13 +2318,16 @@ dummy_func(
             assert(oparg <= INSTR_OFFSET());
             JUMPBY(-oparg);
             #if ENABLE_SPECIALIZATION
-            this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
+            uint16_t counter = this_instr[1].cache;
+            this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
             /* We are using unsigned values, but we really want signed values, 
so
-             * do the 2s complement comparison manually */
-            uint16_t ucounter = this_instr[1].cache + (1 << 15);
-            uint16_t threshold = tstate->interp->optimizer_backedge_threshold 
+ (1 << 15);
+             * do the 2s complement adjustment manually */
+            uint32_t offset_counter = counter ^ (1 << 15);
+            uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
+            assert((threshold & OPTIMIZER_BITS_MASK) == 0);
+            // Use '>=' not '>' so that the optimizer/backoff bits do not 
effect the result.
             // Double-check that the opcode isn't instrumented or something:
-            if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
+            if (offset_counter >= threshold && this_instr->op.code == 
JUMP_BACKWARD) {
                 OPT_STAT_INC(attempts);
                 _Py_CODEUNIT *start = this_instr;
                 /* Back up over EXTENDED_ARGs so optimizer sees the whole 
instruction */
@@ -2338,18 +2341,18 @@ dummy_func(
                     // Rewind and enter the executor:
                     assert(start->op.code == ENTER_EXECUTOR);
                     next_instr = start;
-                    this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 
1);
+                    this_instr[1].cache &= OPTIMIZER_BITS_MASK;
                 }
                 else {
-                    int backoff = this_instr[1].cache & ((1 << 
OPTIMIZER_BITS_IN_COUNTER) - 1);
-                    if (backoff < MINIMUM_TIER2_BACKOFF) {
-                        backoff = MINIMUM_TIER2_BACKOFF;
+                    int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
+                    backoff++;
+                    if (backoff < MIN_TIER2_BACKOFF) {
+                        backoff = MIN_TIER2_BACKOFF;
                     }
-                    else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
-                        backoff++;
+                    else if (backoff > MAX_TIER2_BACKOFF) {
+                        backoff = MAX_TIER2_BACKOFF;
                     }
-                    assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
-                    this_instr[1].cache = ((1 << 16) - ((1 << 
OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
+                    this_instr[1].cache = ((UINT16_MAX << 
OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
                 }
             }
             #endif  /* ENABLE_SPECIALIZATION */
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index afb6650e5920fb..a49223e4db5318 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -3263,13 +3263,16 @@
             assert(oparg <= INSTR_OFFSET());
             JUMPBY(-oparg);
             #if ENABLE_SPECIALIZATION
-            this_instr[1].cache += (1 << OPTIMIZER_BITS_IN_COUNTER);
+            uint16_t counter = this_instr[1].cache;
+            this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER);
             /* We are using unsigned values, but we really want signed values, 
so
-             * do the 2s complement comparison manually */
-            uint16_t ucounter = this_instr[1].cache + (1 << 15);
-            uint16_t threshold = tstate->interp->optimizer_backedge_threshold 
+ (1 << 15);
+             * do the 2s complement adjustment manually */
+            uint32_t offset_counter = counter ^ (1 << 15);
+            uint32_t threshold = tstate->interp->optimizer_backedge_threshold;
+            assert((threshold & OPTIMIZER_BITS_MASK) == 0);
+            // Use '>=' not '>' so that the optimizer/backoff bits do not 
effect the result.
             // Double-check that the opcode isn't instrumented or something:
-            if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
+            if (offset_counter >= threshold && this_instr->op.code == 
JUMP_BACKWARD) {
                 OPT_STAT_INC(attempts);
                 _Py_CODEUNIT *start = this_instr;
                 /* Back up over EXTENDED_ARGs so optimizer sees the whole 
instruction */
@@ -3283,18 +3286,18 @@
                     // Rewind and enter the executor:
                     assert(start->op.code == ENTER_EXECUTOR);
                     next_instr = start;
-                    this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 
1);
+                    this_instr[1].cache &= OPTIMIZER_BITS_MASK;
                 }
                 else {
-                    int backoff = this_instr[1].cache & ((1 << 
OPTIMIZER_BITS_IN_COUNTER) - 1);
-                    if (backoff < MINIMUM_TIER2_BACKOFF) {
-                        backoff = MINIMUM_TIER2_BACKOFF;
+                    int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
+                    backoff++;
+                    if (backoff < MIN_TIER2_BACKOFF) {
+                        backoff = MIN_TIER2_BACKOFF;
                     }
-                    else if (backoff < 15 - OPTIMIZER_BITS_IN_COUNTER) {
-                        backoff++;
+                    else if (backoff > MAX_TIER2_BACKOFF) {
+                        backoff = MAX_TIER2_BACKOFF;
                     }
-                    assert(backoff <= 15 - OPTIMIZER_BITS_IN_COUNTER);
-                    this_instr[1].cache = ((1 << 16) - ((1 << 
OPTIMIZER_BITS_IN_COUNTER) << backoff)) | backoff;
+                    this_instr[1].cache = ((UINT16_MAX << 
OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff;
                 }
             }
             #endif  /* ENABLE_SPECIALIZATION */
diff --git a/Python/optimizer.c b/Python/optimizer.c
index f31f83113d3f25..13df8c170a537c 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -109,6 +109,9 @@ never_optimize(
     _PyExecutorObject **exec,
     int Py_UNUSED(stack_entries))
 {
+    /* Although it should be benign for this to be called,
+     * it shouldn't happen, so fail in debug builds. */
+    assert(0 && "never optimize should never be called");
     return 0;
 }
 
@@ -120,13 +123,19 @@ PyTypeObject _PyDefaultOptimizer_Type = {
     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
 };
 
-_PyOptimizerObject _PyOptimizer_Default = {
+static _PyOptimizerObject _PyOptimizer_Default = {
     PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
     .optimize = never_optimize,
-    .resume_threshold = INT16_MAX,
-    .backedge_threshold = INT16_MAX,
+    .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
+    .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
 };
 
+static uint32_t
+shift_and_offset_threshold(uint16_t threshold)
+{
+    return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
+}
+
 _PyOptimizerObject *
 PyUnstable_GetOptimizer(void)
 {
@@ -134,24 +143,33 @@ PyUnstable_GetOptimizer(void)
     if (interp->optimizer == &_PyOptimizer_Default) {
         return NULL;
     }
-    assert(interp->optimizer_backedge_threshold == 
interp->optimizer->backedge_threshold);
-    assert(interp->optimizer_resume_threshold == 
interp->optimizer->resume_threshold);
+    assert(interp->optimizer_backedge_threshold ==
+           shift_and_offset_threshold(interp->optimizer->backedge_threshold));
+    assert(interp->optimizer_resume_threshold ==
+           shift_and_offset_threshold(interp->optimizer->resume_threshold));
     Py_INCREF(interp->optimizer);
     return interp->optimizer;
 }
 
-void
-PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
+_PyOptimizerObject *
+_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
 {
-    PyInterpreterState *interp = _PyInterpreterState_GET();
     if (optimizer == NULL) {
         optimizer = &_PyOptimizer_Default;
     }
     _PyOptimizerObject *old = interp->optimizer;
     Py_INCREF(optimizer);
     interp->optimizer = optimizer;
-    interp->optimizer_backedge_threshold = optimizer->backedge_threshold;
-    interp->optimizer_resume_threshold = optimizer->resume_threshold;
+    interp->optimizer_backedge_threshold = 
shift_and_offset_threshold(optimizer->backedge_threshold);
+    interp->optimizer_resume_threshold = 
shift_and_offset_threshold(optimizer->resume_threshold);
+    return old;
+}
+
+void
+PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    _PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
     Py_DECREF(old);
 }
 
@@ -860,10 +878,10 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
         return NULL;
     }
     opt->optimize = uop_optimize;
-    opt->resume_threshold = INT16_MAX;
-    // Need at least 3 iterations to settle specializations.
-    // A few lower bits of the counter are reserved for other flags.
-    opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER;
+    opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
+    // Need a few iterations to settle specializations,
+    // and to ammortize the cost of optimization.
+    opt->backedge_threshold = 16;
     return (PyObject *)opt;
 }
 
@@ -950,7 +968,7 @@ PyUnstable_Optimizer_NewCounter(void)
         return NULL;
     }
     opt->base.optimize = counter_optimize;
-    opt->base.resume_threshold = INT16_MAX;
+    opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
     opt->base.backedge_threshold = 0;
     opt->count = 0;
     return (PyObject *)opt;
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 230018068d751c..7e4c07bb657d19 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1627,8 +1627,8 @@ finalize_modules(PyThreadState *tstate)
 
     // Invalidate all executors and turn off tier 2 optimizer
     _Py_Executors_InvalidateAll(interp);
-    Py_XDECREF(interp->optimizer);
-    interp->optimizer = &_PyOptimizer_Default;
+    _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
+    Py_XDECREF(old);
 
     // Stop watching __builtin__ modifications
     PyDict_Unwatch(0, interp->builtins);
diff --git a/Python/pystate.c b/Python/pystate.c
index 937c43033b068d..996f465825215f 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -625,9 +625,7 @@ init_interpreter(PyInterpreterState *interp,
     }
     interp->sys_profile_initialized = false;
     interp->sys_trace_initialized = false;
-    interp->optimizer = &_PyOptimizer_Default;
-    interp->optimizer_backedge_threshold = 
_PyOptimizer_Default.backedge_threshold;
-    interp->optimizer_resume_threshold = 
_PyOptimizer_Default.backedge_threshold;
+    (void)_Py_SetOptimizer(interp, NULL);
     interp->next_func_version = 1;
     interp->executor_list_head = NULL;
     if (interp != &runtime->_main_interpreter) {
@@ -780,10 +778,8 @@ interpreter_clear(PyInterpreterState *interp, 
PyThreadState *tstate)
         tstate->_status.cleared = 0;
     }
 
-    Py_CLEAR(interp->optimizer);
-    interp->optimizer = &_PyOptimizer_Default;
-    interp->optimizer_backedge_threshold = 
_PyOptimizer_Default.backedge_threshold;
-    interp->optimizer_resume_threshold = 
_PyOptimizer_Default.backedge_threshold;
+    _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
+    Py_DECREF(old);
 
     /* It is possible that any of the objects below have a finalizer
        that runs Python code or otherwise relies on a thread state

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to