https://github.com/python/cpython/commit/0828782773bf6beb10e7671e58d0f7426b12d487
commit: 0828782773bf6beb10e7671e58d0f7426b12d487
branch: main
author: Hai Zhu <[email protected]>
committer: markshannon <[email protected]>
date: 2026-05-21T17:06:52+01:00
summary:

gh-149358: Generating JIT recorder transformers and stabilizing family layouts 
(GH-149359)

files:
M Lib/test/test_generated_cases.py
M Python/optimizer.c
M Python/record_functions.c.h
M Tools/cases_generator/analyzer.py
M Tools/cases_generator/record_function_generator.py

diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py
index 9915d7be7062d1..0cc44ff95b59d8 100644
--- a/Lib/test/test_generated_cases.py
+++ b/Lib/test/test_generated_cases.py
@@ -2187,6 +2187,75 @@ def 
test_family_member_needs_transform_only_when_shape_changes(self):
             "[OP_TYPED_SPECIALIZED] = {1, 0, {0}}",
         )
 
+    def test_record_transform_generated_from_recording_uop(self):
+        input = """
+        tier2 op(_RECORD_TOS, (tos -- tos)) {
+            RECORD_VALUE(PyStackRef_AsPyObjectBorrow(tos));
+        }
+        tier2 op(_RECORD_TOS_TYPE, (tos -- tos)) {
+            RECORD_VALUE(Py_TYPE(PyStackRef_AsPyObjectBorrow(tos)));
+        }
+        op(_DO_STUFF, (tos -- res)) {
+            res = tos;
+        }
+        macro(OP) = _RECORD_TOS + _DO_STUFF;
+        macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        self.assertIn("_PyOpcode_RecordTransform_TOS_TYPE", output)
+        self.assertIn("tos = PyStackRef_FromPyObjectBorrow(recorded_value);", 
output)
+        self.assertIn(
+            "transformed_value = (PyObject 
*)Py_TYPE(PyStackRef_AsPyObjectBorrow(tos));",
+            output,
+        )
+        self.assertIn("return _PyOpcode_RecordTransform_TOS_TYPE(value);", 
output)
+        self.assertNotIn("record_trace_transform_to_type", output)
+
+    def test_record_transform_generated_when_only_specialization_records(self):
+        input = """
+        tier2 op(_RECORD_TOS_TYPE, (tos -- tos)) {
+            RECORD_VALUE(Py_TYPE(PyStackRef_AsPyObjectBorrow(tos)));
+        }
+        op(_DO_STUFF, (tos -- res)) {
+            res = tos;
+        }
+        macro(OP) = _DO_STUFF;
+        macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        # Family head must adopt the specialization's recorder.
+        self.assertIn("[OP] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
+        self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_TYPE_INDEX}}", 
output)
+        # Specialization consumes the slot directly (mask 0), no transform.
+        self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}")
+        self.assertNotIn("_PyOpcode_RecordTransform_TOS_TYPE(", output)
+
+    def test_no_record_transform_when_only_base_records(self):
+        input = """
+        tier2 op(_RECORD_TOS, (tos -- tos)) {
+            RECORD_VALUE(PyStackRef_AsPyObjectBorrow(tos));
+        }
+        op(_DO_STUFF, (tos -- res)) {
+            res = tos;
+        }
+        macro(OP) = _RECORD_TOS + _DO_STUFF;
+        macro(OP_SPECIALIZED) = _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        # Family head records via _RECORD_TOS.
+        self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
+        self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
+        # Specialization has no consumer slot map entry (it doesn't read it).
+        self.assertNotIn(
+            "[OP_SPECIALIZED] = {1,", self.get_slot_map_section(output)
+        )
+        # No transform helpers are generated.
+        self.assertNotIn("_PyOpcode_RecordTransform_TOS(", output)
+        self.assertNotIn("_PyOpcode_RecordTransform_TOS_TYPE", output)
+
     def test_family_member_maps_positional_recorders_to_family_slots(self):
         input = """
         tier2 op(_RECORD_TOS, (sub -- sub)) {
@@ -2243,7 +2312,12 @@ def 
test_family_head_records_union_of_member_recorders(self):
         macro(OP_SPECIALIZED) = _RECORD_TOS + _DO_STUFF;
         family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
         """
+        analysis = self.analyze_input(input)
         output = self.generate_tables(input)
+        self.assertEqual(
+            analysis.families["OP"].get_member_record_names(),
+            ("_RECORD_TOS",),
+        )
         self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
         self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
         self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}")
diff --git a/Python/optimizer.c b/Python/optimizer.c
index db258fff22cdd1..e95e4b5e24b2c5 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -663,44 +663,6 @@ is_terminator(const _PyUOpInstruction *uop)
     );
 }
 
-static PyObject *
-record_trace_transform_to_type(PyObject *value)
-{
-    PyObject *tp = Py_NewRef((PyObject *)Py_TYPE(value));
-    Py_DECREF(value);
-    return tp;
-}
-
-/* _RECORD_NOS_GEN_FUNC and _RECORD_3OS_GEN_FUNC record the raw receiver.
- * If it is a generator, return its function object; otherwise return NULL.
- */
-static PyObject *
-record_trace_transform_gen_func(PyObject *value)
-{
-    PyObject *func = NULL;
-    if (PyGen_Check(value)) {
-        _PyStackRef f = ((PyGenObject *)value)->gi_iframe.f_funcobj;
-        if (!PyStackRef_IsNull(f)) {
-            func = Py_NewRef(PyStackRef_AsPyObjectBorrow(f));
-        }
-    }
-    Py_DECREF(value);
-    return func;
-}
-
-/* _RECORD_BOUND_METHOD records the raw callable.
- * Keep it only for bound methods; otherwise return NULL.
- */
-static PyObject *
-record_trace_transform_bound_method(PyObject *value)
-{
-    if (Py_TYPE(value) == &PyMethod_Type) {
-        return value;
-    }
-    Py_DECREF(value);
-    return NULL;
-}
-
 /* Returns 1 on success (added to trace), 0 on trace end.
  */
 // gh-142543: inlining this function causes stack overflows
diff --git a/Python/record_functions.c.h b/Python/record_functions.c.h
index 8cd87f00e8079e..98abe3d0505e20 100644
--- a/Python/record_functions.c.h
+++ b/Python/record_functions.c.h
@@ -259,19 +259,77 @@ const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[9] = {
         [_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS,
 };
 
+static PyObject *
+_PyOpcode_RecordTransform_NOS_TYPE(PyObject *recorded_value)
+{
+    PyObject *transformed_value = NULL;
+    _PyStackRef nos;
+    nos = PyStackRef_FromPyObjectBorrow(recorded_value);
+    transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(nos));
+    Py_XINCREF(transformed_value);
+    Py_DECREF(recorded_value);
+    return transformed_value;
+}
+
+static PyObject *
+_PyOpcode_RecordTransform_TOS_TYPE(PyObject *recorded_value)
+{
+    PyObject *transformed_value = NULL;
+    _PyStackRef tos;
+    tos = PyStackRef_FromPyObjectBorrow(recorded_value);
+    transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(tos));
+    Py_XINCREF(transformed_value);
+    Py_DECREF(recorded_value);
+    return transformed_value;
+}
+
+static PyObject *
+_PyOpcode_RecordTransform_NOS_GEN_FUNC(PyObject *recorded_value)
+{
+    PyObject *transformed_value = NULL;
+    _PyStackRef nos;
+    nos = PyStackRef_FromPyObjectBorrow(recorded_value);
+    PyObject *obj = PyStackRef_AsPyObjectBorrow(nos);
+    if (PyGen_Check(obj)) {
+        PyGenObject *gen = (PyGenObject *)obj;
+        _PyStackRef func = gen->gi_iframe.f_funcobj;
+        if (!PyStackRef_IsNull(func)) {
+            transformed_value = (PyObject *)PyStackRef_AsPyObjectBorrow(func);
+            Py_XINCREF(transformed_value);
+        }
+    }
+    Py_DECREF(recorded_value);
+    return transformed_value;
+}
+
+static PyObject *
+_PyOpcode_RecordTransform_BOUND_METHOD(PyObject *recorded_value)
+{
+    PyObject *transformed_value = NULL;
+    _PyStackRef callable;
+    callable = PyStackRef_FromPyObjectBorrow(recorded_value);
+    PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
+    if (Py_TYPE(callable_o) == &PyMethod_Type) {
+        transformed_value = (PyObject *)callable_o;
+        Py_XINCREF(transformed_value);
+    }
+    Py_DECREF(recorded_value);
+    return transformed_value;
+}
+
 PyObject *
 _PyOpcode_RecordTransformValue(int uop, PyObject *value)
 {
-        switch (uop) {
-                case _RECORD_TOS_TYPE:
-                case _RECORD_NOS_TYPE:
-                    return record_trace_transform_to_type(value);
-                case _RECORD_NOS_GEN_FUNC:
-                case _RECORD_3OS_GEN_FUNC:
-                    return record_trace_transform_gen_func(value);
-                case _RECORD_BOUND_METHOD:
-                    return record_trace_transform_bound_method(value);
-                default:
-                    return value;
-        }
+    switch (uop) {
+        case _RECORD_NOS_TYPE:
+            return _PyOpcode_RecordTransform_NOS_TYPE(value);
+        case _RECORD_TOS_TYPE:
+            return _PyOpcode_RecordTransform_TOS_TYPE(value);
+        case _RECORD_NOS_GEN_FUNC:
+            return _PyOpcode_RecordTransform_NOS_GEN_FUNC(value);
+        case _RECORD_BOUND_METHOD:
+            return _PyOpcode_RecordTransform_BOUND_METHOD(value);
+        default:
+            return value;
+    }
 }
diff --git a/Tools/cases_generator/analyzer.py 
b/Tools/cases_generator/analyzer.py
index 6f0ddeaeaabf09..42459eedad6b1d 100644
--- a/Tools/cases_generator/analyzer.py
+++ b/Tools/cases_generator/analyzer.py
@@ -322,6 +322,16 @@ class Family:
     size: str
     members: list[Instruction]
 
+    def get_member_record_names(self) -> tuple[str, ...]:
+        seen: set[str] = set()
+        names: list[str] = []
+        for member in self.members:
+            for part in member.parts:
+                if part.properties.records_value and part.name not in seen:
+                    seen.add(part.name)
+                    names.append(part.name)
+        return tuple(names)
+
     def dump(self, indent: str) -> None:
         print(indent, self.name, "= ", ", ".join([m.name for m in 
self.members]))
 
diff --git a/Tools/cases_generator/record_function_generator.py 
b/Tools/cases_generator/record_function_generator.py
index 118ffa6c89caaa..b5a028384adb49 100644
--- a/Tools/cases_generator/record_function_generator.py
+++ b/Tools/cases_generator/record_function_generator.py
@@ -3,7 +3,9 @@
 
 from analyzer import (
     Analysis,
+    Family,
     Instruction,
+    Uop,
     analyze_files,
     CodeSection,
 )
@@ -19,7 +21,6 @@
 from cwriter import CWriter
 
 from tier1_generator import write_uop, Emitter, declare_variable
-from typing import TextIO
 from lexer import Token
 from stack import Stack, Storage
 
@@ -28,16 +29,6 @@
 # Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h.
 MAX_RECORDED_VALUES = 3
 
-# Map `_RECORD_*` uops to the helper that converts a raw family-recorded
-# value to the form the specialized member consumes.
-_RECORD_TRANSFORM_HELPERS: dict[str, str] = {
-    "_RECORD_TOS_TYPE": "record_trace_transform_to_type",
-    "_RECORD_NOS_TYPE": "record_trace_transform_to_type",
-    "_RECORD_NOS_GEN_FUNC": "record_trace_transform_gen_func",
-    "_RECORD_3OS_GEN_FUNC": "record_trace_transform_gen_func",
-    "_RECORD_BOUND_METHOD": "record_trace_transform_bound_method",
-}
-
 # Recorder uops whose slot kind differs from the leading word of their name.
 _RECORD_SLOT_KIND_OVERRIDES: dict[str, str] = {
     "_RECORD_BOUND_METHOD": "CALLABLE",
@@ -45,9 +36,11 @@
 
 
 class RecorderEmitter(Emitter):
-    def __init__(self, out: CWriter):
+    def __init__(self, out: CWriter, target: str, incref: str):
         super().__init__(out, {})
         self._replacers["RECORD_VALUE"] = self.record_value
+        self.target = target
+        self.incref = incref
 
     def record_value(
         self,
@@ -57,13 +50,13 @@ def record_value(
         storage: Storage,
         inst: Instruction | None,
     ) -> bool:
-        lparen = next(tkn_iter)
+        next(tkn_iter)
         self.out.start_line()
-        self.emit("*recorded_value = (PyObject *)")
+        self.emit(f"{self.target} = (PyObject *)")
         emit_to(self.out, tkn_iter, "RPAREN")
         next(tkn_iter)  # Semi colon
         self.emit(";\n")
-        self.emit("Py_INCREF(*recorded_value);\n")
+        self.emit(f"{self.incref}({self.target});\n")
         return True
 
 
@@ -80,36 +73,39 @@ def get_instruction_record_names(inst: Instruction) -> 
list[str]:
 
 
 def get_family_record_names(
-    family_head: Instruction,
-    family_members: list[Instruction],
+    family: Family,
     instruction_records: dict[str, list[str]],
     record_slot_keys: dict[str, str],
 ) -> list[str]:
-    member_records = [instruction_records[m.name] for m in family_members]
-    head_records = instruction_records[family_head.name]
+    family_record_names = set(family.get_member_record_names())
+    family_record_names.update(instruction_records[family.name])
     records: list[str] = []
     slot_index: dict[str, int] = {}
 
     def add(name: str) -> None:
         kind = record_slot_keys[name]
+        # Prefer the raw recorder if any family instruction uses it.
+        raw = f"_RECORD_{kind}"
+        source = raw if raw in family_record_names else name
         existing = slot_index.get(kind)
         if existing is None:
             slot_index[kind] = len(records)
-            records.append(name)
-        elif records[existing] != name:
-            raw = f"_RECORD_{kind}"
+            records.append(source)
+        elif records[existing] != source:
             if raw not in record_slot_keys:
                 raise ValueError(
-                    f"Family {family_head.name} has incompatible recorders for 
"
-                    f"slot {kind}: {records[existing]} and {name}, "
+                    f"Family {family.name} has incompatible recorders for "
+                    f"slot {kind}: {records[existing]} and {source}, "
                     f"and no raw recorder {raw} exists to use as a base."
                 )
             records[existing] = raw
 
-    for names in member_records:
-        for name in names:
+    for member in family.members:
+        for name in instruction_records[member.name]:
             add(name)
-    for name in head_records:
+    # Family head supplies any slots no member exercises, and may also
+    # conflict with members (resolved via the raw recorder above).
+    for name in instruction_records[family.name]:
         add(name)
     return records
 
@@ -119,10 +115,11 @@ def get_record_consumer_layout(
     source_records: list[str],
     own_records: list[str],
     record_slot_keys: dict[str, str],
-) -> tuple[list[int], int]:
+) -> tuple[list[int], int, list[str]]:
     used = [False] * len(source_records)
     slot_map: list[int] = []
     transform_mask = 0
+    transform_names: list[str] = []
     for i, own in enumerate(own_records):
         own_kind = record_slot_keys[own]
         for j, src in enumerate(source_records):
@@ -131,13 +128,43 @@ def get_record_consumer_layout(
                 slot_map.append(j)
                 if src != own:
                     transform_mask |= 1 << i
+                    if own not in transform_names:
+                        transform_names.append(own)
                 break
         else:
             raise ValueError(
                 f"Instruction {inst_name} has no compatible family slot for "
                 f"{own} in {source_records}"
             )
-    return slot_map, transform_mask
+    return slot_map, transform_mask, transform_names
+
+
+def get_record_transform_input(uop: Uop) -> str:
+    inputs = [var for var in uop.stack.inputs if var.used]
+    if len(inputs) != 1 or inputs[0].is_array():
+        raise ValueError(
+            f"Recorder transform for {uop.name} needs exactly one scalar input"
+        )
+    return inputs[0].name
+
+
+def generate_record_transform_function(uop: Uop, out: CWriter) -> None:
+    input_name = get_record_transform_input(uop)
+    out.emit("static PyObject *\n")
+    out.emit(f"_PyOpcode_RecordTransform{uop.name[7:]}(PyObject 
*recorded_value)\n")
+    out.emit("{\n")
+    out.emit("PyObject *transformed_value = NULL;\n")
+    for var in uop.stack.inputs:
+        if var.used:
+            declare_variable(var, out)
+    out.emit(f"{input_name} = 
PyStackRef_FromPyObjectBorrow(recorded_value);\n")
+    emitter = RecorderEmitter(out, "transformed_value", "Py_XINCREF")
+    emitter.emit_tokens(uop, Storage(Stack(), [], [], 0, False), None, False)
+    out.start_line()
+    out.emit("Py_DECREF(recorded_value);\n")
+    out.emit("return transformed_value;\n")
+    out.emit("}\n\n")
+
 
 def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: 
CWriter) -> None:
     write_header(__file__, filenames, out.out)
@@ -149,7 +176,7 @@ def generate_recorder_functions(filenames: list[str], 
analysis: Analysis, out: C
 """
     )
     args = "_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, int oparg, 
PyObject **recorded_value"
-    emitter = RecorderEmitter(out)
+    emitter = RecorderEmitter(out, "*recorded_value", "Py_INCREF")
     nop = analysis.instructions["NOP"]
     for uop in analysis.uops.values():
         if not uop.properties.records_value:
@@ -177,8 +204,7 @@ def generate_recorder_tables(analysis: Analysis, out: 
CWriter) -> None:
     record_slot_keys = {name: get_record_slot_kind(name) for name in 
record_uop_names}
     family_record_table = {
         family.name: get_family_record_names(
-            analysis.instructions[family.name],
-            family.members,
+            family,
             instruction_records,
             record_slot_keys,
         )
@@ -188,12 +214,12 @@ def generate_recorder_tables(analysis: Analysis, out: 
CWriter) -> None:
     record_table: dict[str, list[str]] = {}
     record_consumer_table: dict[str, tuple[list[int], int]] = {}
     record_function_indexes: dict[str, int] = {}
+    record_transform_names: list[str] = []
     for inst in analysis.instructions.values():
         own_records = instruction_records[inst.name]
         # TRACE_RECORD runs before execution, but specialization may rewrite
-        # the opcode before translation. Record the raw family shape (union
-        # of head + members) so any opcode in the family can be translated
-        # from the same recorded layout.
+        # the opcode before translation. Use the shared family recording shape
+        # so any opcode in the family can be translated from the same layout.
         family = inst.family or analysis.families.get(inst.name)
         records = family_record_table[family.name] if family is not None else 
own_records
         if not records:
@@ -208,9 +234,13 @@ def generate_recorder_tables(analysis: Analysis, out: 
CWriter) -> None:
             if name not in record_function_indexes:
                 record_function_indexes[name] = len(record_function_indexes) + 
1
         if own_records:
-            record_consumer_table[inst.name] = get_record_consumer_layout(
+            slots, mask, transform_names = get_record_consumer_layout(
                 inst.name, records, own_records, record_slot_keys
             )
+            record_consumer_table[inst.name] = (slots, mask)
+            for name in transform_names:
+                if name not in record_transform_names:
+                    record_transform_names.append(name)
 
     for name, index in record_function_indexes.items():
         out.emit(f"#define {name}_INDEX {index}\n")
@@ -238,37 +268,32 @@ def generate_recorder_tables(analysis: Analysis, out: 
CWriter) -> None:
     for name in record_function_indexes:
         out.emit(f"    [{name}_INDEX] = _PyOpcode_RecordFunction{name[7:]},\n")
     out.emit("};\n")
-    generate_record_transform_dispatcher(record_uop_names, out)
+    out.emit("\n")
+    for name in record_transform_names:
+        generate_record_transform_function(analysis.uops[name], out)
+    generate_record_transform_dispatcher(record_transform_names, out)
 
 
 def generate_record_transform_dispatcher(
-    record_uop_names: list[str], out: CWriter
+    transform_names: list[str], out: CWriter
 ) -> None:
     """Emit a switch that converts a family-recorded value for a recorder uop.
 
-    Only `_RECORD_*` uops that need conversion get a case; the default
-    returns the input value unchanged. Helpers live in Python/optimizer.c.
+    Only `_RECORD_*` uops that need conversion get a case; the default returns
+    the input value unchanged.
     """
-    cases: dict[str, list[str]] = {}
-    for record_name in record_uop_names:
-        helper = _RECORD_TRANSFORM_HELPERS.get(record_name)
-        if helper is None:
-            continue
-        cases.setdefault(helper, []).append(record_name)
-    out.emit("\n")
     out.emit(
         "PyObject *\n"
         "_PyOpcode_RecordTransformValue(int uop, PyObject *value)\n"
         "{\n"
     )
-    out.emit("    switch (uop) {\n")
-    for helper, names in cases.items():
-        for name in names:
-            out.emit(f"        case {name}:\n")
-        out.emit(f"            return {helper}(value);\n")
-    out.emit("        default:\n")
-    out.emit("            return value;\n")
-    out.emit("    }\n")
+    out.emit("switch (uop) {\n")
+    for name in transform_names:
+        out.emit(f"case {name}:\n")
+        out.emit(f"    return _PyOpcode_RecordTransform{name[7:]}(value);\n")
+    out.emit("default:\n")
+    out.emit("    return value;\n")
+    out.emit("}\n")
     out.emit("}\n")
 
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to