https://github.com/python/cpython/commit/198b04b75f7425c401ffe40a748688a89d28dd59
commit: 198b04b75f7425c401ffe40a748688a89d28dd59
branch: main
author: Hai Zhu <[email protected]>
committer: Fidget-Spinner <[email protected]>
date: 2026-04-03T23:54:30+08:00
summary:
gh-146073: Add fitness/exit quality mechanism for JIT trace frontend (GH-147966)
files:
M Include/cpython/pystats.h
M Include/internal/pycore_interp_structs.h
M Include/internal/pycore_optimizer.h
M Python/optimizer.c
M Python/pystate.c
M Python/pystats.c
diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
index e473110eca7415..5d1f44988a6df1 100644
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@@ -144,6 +144,7 @@ typedef struct _optimization_stats {
uint64_t unknown_callee;
uint64_t trace_immediately_deopts;
uint64_t executors_invalidated;
+ uint64_t fitness_terminated_traces;
UOpStats opcode[PYSTATS_MAX_UOP_ID + 1];
uint64_t unsupported_opcode[256];
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
diff --git a/Include/internal/pycore_interp_structs.h
b/Include/internal/pycore_interp_structs.h
index f76d4f41c55119..0cebe1b4b9e995 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -449,6 +449,10 @@ typedef struct _PyOptimizationConfig {
uint16_t side_exit_initial_value;
uint16_t side_exit_initial_backoff;
+ // Trace fitness thresholds
+ uint16_t fitness_initial;
+ uint16_t fitness_initial_side;
+
// Optimization flags
bool specialization_enabled;
bool uops_optimize_enabled;
diff --git a/Include/internal/pycore_optimizer.h
b/Include/internal/pycore_optimizer.h
index 2986afb142b5d1..820ee32201c1f8 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -15,6 +15,23 @@ extern "C" {
#include "pycore_optimizer_types.h"
#include <stdbool.h>
+/* Default fitness configuration values for trace quality control.
+ * FITNESS_INITIAL and FITNESS_INITIAL_SIDE can be overridden via
+ * PYTHON_JIT_FITNESS_INITIAL and PYTHON_JIT_FITNESS_INITIAL_SIDE */
+#define FITNESS_PER_INSTRUCTION 2
+#define FITNESS_BRANCH_BASE 5
+#define FITNESS_INITIAL (FITNESS_PER_INSTRUCTION * 1000)
+#define FITNESS_INITIAL_SIDE (FITNESS_INITIAL / 2)
+#define FITNESS_BACKWARD_EDGE (FITNESS_INITIAL / 10)
+
+/* Exit quality constants for fitness-based trace termination.
+ * Higher values mean better places to stop the trace. */
+
+#define EXIT_QUALITY_DEFAULT 200
+#define EXIT_QUALITY_CLOSE_LOOP (4 * EXIT_QUALITY_DEFAULT)
+#define EXIT_QUALITY_ENTER_EXECUTOR (2 * EXIT_QUALITY_DEFAULT + 100)
+#define EXIT_QUALITY_SPECIALIZABLE (EXIT_QUALITY_DEFAULT / 4)
+
typedef struct _PyJitUopBuffer {
_PyUOpInstruction *start;
@@ -101,7 +118,8 @@ typedef struct _PyJitTracerPreviousState {
} _PyJitTracerPreviousState;
typedef struct _PyJitTracerTranslatorState {
- int jump_backward_seen;
+ int32_t fitness; // Current trace fitness, starts high,
decrements
+ int frame_depth; // Current inline depth (0 = root frame)
} _PyJitTracerTranslatorState;
typedef struct _PyJitTracerState {
diff --git a/Python/optimizer.c b/Python/optimizer.c
index f09bf778587b12..c7a6b7e746545c 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -549,8 +549,6 @@ dynamic_exit_uop[MAX_UOP_ID + 1] = {
};
-#define CONFIDENCE_RANGE 1000
-#define CONFIDENCE_CUTOFF 333
#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
@@ -598,6 +596,46 @@ add_to_trace(
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
+/* Compute branch fitness penalty based on how likely the traced path is.
+ * The penalty is small when the traced path is common, large when rare.
+ * A branch that historically goes the other way gets a heavy penalty. */
+static inline int
+compute_branch_penalty(uint16_t history, bool branch_taken)
+{
+ int taken_count = _Py_popcount32((uint32_t)history);
+ int on_trace_count = branch_taken ? taken_count : 16 - taken_count;
+ int off_trace = 16 - on_trace_count;
+ /* Linear scaling: off_trace ranges from 0 (fully biased our way)
+ * to 16 (fully biased against us), so the penalty ranges from
+ * FITNESS_BRANCH_BASE to FITNESS_BRANCH_BASE + 32. */
+ return FITNESS_BRANCH_BASE + off_trace * 2;
+}
+
+/* Compute exit quality for the current trace position.
+ * Higher values mean better places to stop the trace. */
+static inline int32_t
+compute_exit_quality(_Py_CODEUNIT *target_instr, int opcode,
+ const _PyJitTracerState *tracer)
+{
+ if (target_instr == tracer->initial_state.start_instr ||
+ target_instr == tracer->initial_state.close_loop_instr) {
+ return EXIT_QUALITY_CLOSE_LOOP;
+ }
+ if (target_instr->op.code == ENTER_EXECUTOR) {
+ return EXIT_QUALITY_ENTER_EXECUTOR;
+ }
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]] > 0) {
+ return EXIT_QUALITY_SPECIALIZABLE;
+ }
+ return EXIT_QUALITY_DEFAULT;
+}
+
+static inline int32_t
+compute_frame_penalty(const _PyOptimizationConfig *cfg)
+{
+ return (int32_t)cfg->fitness_initial / 10 + 1;
+}
+
static int
is_terminator(const _PyUOpInstruction *uop)
{
@@ -637,6 +675,7 @@ _PyJit_translate_single_bytecode_to_trace(
_Py_CODEUNIT *this_instr = tracer->prev_state.instr;
_Py_CODEUNIT *target_instr = this_instr;
uint32_t target = 0;
+ int end_trace_opcode = _DEOPT;
target = Py_IsNone((PyObject *)old_code)
? (uint32_t)(target_instr -
_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR)
@@ -734,16 +773,14 @@ _PyJit_translate_single_bytecode_to_trace(
DPRINTF(2, "Unsupported: oparg too large\n");
unsupported:
{
- // Rewind to previous instruction and replace with _EXIT_TRACE.
_PyUOpInstruction *curr = uop_buffer_last(trace);
while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) {
trace->next--;
curr = uop_buffer_last(trace);
}
- assert(curr->opcode == _SET_IP || uop_buffer_length(trace) == 2);
if (curr->opcode == _SET_IP) {
int32_t old_target = (int32_t)uop_get_target(curr);
- curr->opcode = _DEOPT;
+ curr->opcode = end_trace_opcode;
curr->format = UOP_FORMAT_TARGET;
curr->target = old_target;
}
@@ -763,6 +800,23 @@ _PyJit_translate_single_bytecode_to_trace(
return 1;
}
+ // Fitness-based trace quality check (before reserving space for this
instruction)
+ _PyJitTracerTranslatorState *ts = &tracer->translator_state;
+ int32_t eq = compute_exit_quality(target_instr, opcode, tracer);
+ DPRINTF(3, "Fitness check: %s(%d) fitness=%d, exit_quality=%d, depth=%d\n",
+ _PyOpcode_OpName[opcode], oparg, ts->fitness, eq, ts->frame_depth);
+
+ // Check if fitness is depleted — should we stop the trace?
+ if (ts->fitness < eq) {
+ // This is a tracer heuristic rather than normal program control flow,
+ // so leave operand1 clear and let the resulting side exit increase
chain_depth.
+ ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+ OPT_STAT_INC(fitness_terminated_traces);
+ DPRINTF(2, "Fitness terminated: %s(%d) fitness=%d < exit_quality=%d\n",
+ _PyOpcode_OpName[opcode], oparg, ts->fitness, eq);
+ goto done;
+ }
+
// One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
trace->end -= 2;
@@ -816,6 +870,12 @@ _PyJit_translate_single_bytecode_to_trace(
assert(jump_happened ? (next_instr == computed_jump_instr) :
(next_instr == computed_next_instr));
uint32_t uopcode = BRANCH_TO_GUARD[opcode -
POP_JUMP_IF_FALSE][jump_happened];
ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ?
computed_next_instr : computed_jump_instr, old_code));
+ int bp = compute_branch_penalty(target_instr[1].cache,
jump_happened);
+ tracer->translator_state.fitness -= bp;
+ DPRINTF(3, " branch penalty: -%d (history=0x%04x, taken=%d) ->
fitness=%d\n",
+ bp, target_instr[1].cache, jump_happened,
+ tracer->translator_state.fitness);
+
break;
}
case JUMP_BACKWARD_JIT:
@@ -823,6 +883,9 @@ _PyJit_translate_single_bytecode_to_trace(
case JUMP_BACKWARD_NO_JIT:
case JUMP_BACKWARD:
ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target);
+ tracer->translator_state.fitness -= FITNESS_BACKWARD_EDGE;
+ DPRINTF(3, " backward edge penalty: -%d -> fitness=%d\n",
+ FITNESS_BACKWARD_EDGE, tracer->translator_state.fitness);
_Py_FALLTHROUGH;
case JUMP_BACKWARD_NO_INTERRUPT:
{
@@ -945,6 +1008,44 @@ _PyJit_translate_single_bytecode_to_trace(
assert(next->op.code == STORE_FAST);
operand = next->op.arg;
}
+ else if (uop == _PUSH_FRAME) {
+ _PyJitTracerTranslatorState *ts_depth =
&tracer->translator_state;
+ ts_depth->frame_depth++;
+ if (ts_depth->frame_depth >= MAX_ABSTRACT_FRAME_DEPTH) {
+ // The optimizer can't handle frames this deep,
+ // so there's no point continuing the trace.
+ DPRINTF(2, "Unsupported: frame depth %d >=
MAX_ABSTRACT_FRAME_DEPTH\n",
+ ts_depth->frame_depth);
+ end_trace_opcode = _EXIT_TRACE;
+ goto unsupported;
+ }
+ int32_t frame_penalty =
compute_frame_penalty(&tstate->interp->opt_config);
+ int32_t cost = frame_penalty * ts_depth->frame_depth;
+ ts_depth->fitness -= cost;
+ DPRINTF(3, " _PUSH_FRAME: depth=%d, penalty=-%d
(per_frame=%d) -> fitness=%d\n",
+ ts_depth->frame_depth, cost, frame_penalty,
+ ts_depth->fitness);
+ }
+ else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR ||
uop == _YIELD_VALUE) {
+ _PyJitTracerTranslatorState *ts_depth =
&tracer->translator_state;
+ int32_t frame_penalty =
compute_frame_penalty(&tstate->interp->opt_config);
+ if (ts_depth->frame_depth <= 0) {
+ // Underflow: returning from a frame we didn't enter
+ ts_depth->fitness -= frame_penalty * 2;
+ DPRINTF(3, " %s: underflow penalty=-%d ->
fitness=%d\n",
+ _PyOpcode_uop_name[uop], frame_penalty * 2,
+ ts_depth->fitness);
+ }
+ else {
+ // Reward returning: small inlined calls should be
encouraged
+ ts_depth->fitness += frame_penalty;
+ DPRINTF(3, " %s: return reward=+%d, depth=%d ->
fitness=%d\n",
+ _PyOpcode_uop_name[uop], frame_penalty,
+ ts_depth->frame_depth - 1,
+ ts_depth->fitness);
+ }
+ ts_depth->frame_depth = ts_depth->frame_depth <= 0 ? 0 :
ts_depth->frame_depth - 1;
+ }
else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
PyObject *recorded_value =
tracer->prev_state.recorded_value;
tracer->prev_state.recorded_value = NULL;
@@ -986,7 +1087,13 @@ _PyJit_translate_single_bytecode_to_trace(
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
goto done;
}
- DPRINTF(2, "Trace continuing\n");
+ // Update fitness AFTER translation, BEFORE returning to continue tracing.
+ // This ensures the next iteration's fitness check reflects the cost of
+ // all instructions translated so far.
+ tracer->translator_state.fitness -= FITNESS_PER_INSTRUCTION;
+ DPRINTF(3, " per-insn cost: -%d -> fitness=%d\n",
+ FITNESS_PER_INSTRUCTION, tracer->translator_state.fitness);
+ DPRINTF(2, "Trace continuing (fitness=%d)\n",
tracer->translator_state.fitness);
return 1;
done:
DPRINTF(2, "Trace done\n");
@@ -1069,6 +1176,17 @@ _PyJit_TryInitializeTracing(
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code ==
RESUME_CHECK_JIT || (exit != NULL));
tracer->initial_state.jump_backward_instr = curr_instr;
+ // Initialize fitness tracking state
+ const _PyOptimizationConfig *cfg = &tstate->interp->opt_config;
+ _PyJitTracerTranslatorState *ts = &tracer->translator_state;
+ bool is_side_trace = (exit != NULL);
+ ts->fitness = is_side_trace
+ ? (int32_t)cfg->fitness_initial_side
+ : (int32_t)cfg->fitness_initial;
+ ts->frame_depth = 0;
+ DPRINTF(3, "Fitness init: %s trace, fitness=%d\n",
+ is_side_trace ? "side" : "root", ts->fitness);
+
tracer->is_tracing = true;
return 1;
}
diff --git a/Python/pystate.c b/Python/pystate.c
index 143175da0f45c7..78eab7cc7d2459 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -635,6 +635,22 @@ init_interpreter(PyInterpreterState *interp,
"PYTHON_JIT_SIDE_EXIT_INITIAL_BACKOFF",
SIDE_EXIT_INITIAL_BACKOFF, 0, MAX_BACKOFF);
+ // Trace fitness configuration
+ init_policy(&interp->opt_config.fitness_initial,
+ "PYTHON_JIT_FITNESS_INITIAL",
+ FITNESS_INITIAL, 100, 10000);
+ init_policy(&interp->opt_config.fitness_initial_side,
+ "PYTHON_JIT_FITNESS_INITIAL_SIDE",
+ FITNESS_INITIAL_SIDE, 50, 5000);
+ /* The tracer starts at start_instr, so initial fitness must not be below
+ * the close-loop exit quality or tracing will terminate immediately. */
+ if (interp->opt_config.fitness_initial < EXIT_QUALITY_CLOSE_LOOP) {
+ interp->opt_config.fitness_initial = EXIT_QUALITY_CLOSE_LOOP;
+ }
+ if (interp->opt_config.fitness_initial_side < EXIT_QUALITY_CLOSE_LOOP) {
+ interp->opt_config.fitness_initial_side = EXIT_QUALITY_CLOSE_LOOP;
+ }
+
interp->opt_config.specialization_enabled =
!is_env_enabled("PYTHON_SPECIALIZATION_OFF");
interp->opt_config.uops_optimize_enabled =
!is_env_disabled("PYTHON_UOPS_OPTIMIZE");
if (interp != &runtime->_main_interpreter) {
diff --git a/Python/pystats.c b/Python/pystats.c
index a057ad884566d8..2fac2db1b738c7 100644
--- a/Python/pystats.c
+++ b/Python/pystats.c
@@ -274,6 +274,7 @@ print_optimization_stats(FILE *out, OptimizationStats
*stats)
fprintf(out, "Optimization low confidence: %" PRIu64 "\n",
stats->low_confidence);
fprintf(out, "Optimization unknown callee: %" PRIu64 "\n",
stats->unknown_callee);
fprintf(out, "Executors invalidated: %" PRIu64 "\n",
stats->executors_invalidated);
+ fprintf(out, "Optimization fitness terminated: %" PRIu64 "\n",
stats->fitness_terminated_traces);
print_histogram(out, "Trace length", stats->trace_length_hist);
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]