https://github.com/python/cpython/commit/8cda6ae2f1f86f2d26c29586ffc9687b410abfcf
commit: 8cda6ae2f1f86f2d26c29586ffc9687b410abfcf
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-06-27T16:56:31Z
summary:

gh-151613: Fix remote debugging frame cache ABA (#151614)

The remote debugging frame cache previously used only the last_profiled_frame 
address as its cache anchor. If a frame returned and a later frame reused the 
same _PyInterpreterFrame address, the profiler could accept a stale cache entry 
and splice parent frames from a different call chain into the current stack.

This adds a last_profiled_frame_seq counter next to last_profiled_frame, 
increments it when the anchor advances, stores it in frame cache entries, and 
validates cache hits against both the frame address and the sequence. Cache 
miss walks now copy stack chunks before storing new cache entries so stored 
continuations come from a stable snapshot. The new regression test exercises 
alternating call chains and checks that cached stacks never contain frames from 
both branches.

files:
A Misc/NEWS.d/next/Library/2026-06-17-22-31-57.gh-issue-151613.n0nua1.rst
M Include/cpython/pystate.h
M Include/internal/pycore_debug_offsets.h
M Include/internal/pycore_interpframe.h
M InternalDocs/frames.md
M Misc/NEWS.d/next/Library/2026-06-13-11-57-48.gh-issue-151436.UEDowO.rst
M Modules/_remote_debugging/_remote_debugging.h
M Modules/_remote_debugging/debug_offsets_validation.h
M Modules/_remote_debugging/frame_cache.c
M Modules/_remote_debugging/frames.c
M Modules/_remote_debugging/module.c
M Modules/_remote_debugging/threads.c
M Python/pystate.c

diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index a9d97e47e005dff..f367146e262bfe8 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -143,6 +143,7 @@ struct _ts {
     struct _PyInterpreterFrame *base_frame;
 
     struct _PyInterpreterFrame *last_profiled_frame;
+    uintptr_t last_profiled_frame_seq;
 
     Py_tracefunc c_profilefunc;
     Py_tracefunc c_tracefunc;
diff --git a/Include/internal/pycore_debug_offsets.h 
b/Include/internal/pycore_debug_offsets.h
index 18490f98a918a73..6e1eb573c8c2c83 100644
--- a/Include/internal/pycore_debug_offsets.h
+++ b/Include/internal/pycore_debug_offsets.h
@@ -104,6 +104,7 @@ typedef struct _Py_DebugOffsets {
         uint64_t current_frame;
         uint64_t base_frame;
         uint64_t last_profiled_frame;
+        uint64_t last_profiled_frame_seq;
         uint64_t thread_id;
         uint64_t native_thread_id;
         uint64_t datastack_chunk;
@@ -294,6 +295,7 @@ typedef struct _Py_DebugOffsets {
         .current_frame = offsetof(PyThreadState, current_frame), \
         .base_frame = offsetof(PyThreadState, base_frame), \
         .last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
+        .last_profiled_frame_seq = offsetof(PyThreadState, 
last_profiled_frame_seq), \
         .thread_id = offsetof(PyThreadState, thread_id), \
         .native_thread_id = offsetof(PyThreadState, native_thread_id), \
         .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
diff --git a/Include/internal/pycore_interpframe.h 
b/Include/internal/pycore_interpframe.h
index 3608a7f3bce894a..9809cd292995f0b 100644
--- a/Include/internal/pycore_interpframe.h
+++ b/Include/internal/pycore_interpframe.h
@@ -319,12 +319,15 @@ _PyThreadState_GetFrame(PyThreadState *tstate)
 // This avoids corrupting the cache when transient frames (called and returned
 // between profiler samples) update last_profiled_frame to addresses the
 // profiler never saw.
+// The sequence distinguishes this anchor from a later frame that reuses the
+// same _PyInterpreterFrame address.
 #define _PyThreadState_UpdateLastProfiledFrame(tstate, frame, previous) \
     do { \
         PyThreadState *tstate_ = (tstate); \
         _PyInterpreterFrame *frame_ = (frame); \
         if (tstate_->last_profiled_frame == frame_) { \
             tstate_->last_profiled_frame = (previous); \
+            tstate_->last_profiled_frame_seq++; \
         } \
     } while (0)
 
diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md
index 60ab2055afa7b1c..475ca75e28e1a5d 100644
--- a/InternalDocs/frames.md
+++ b/InternalDocs/frames.md
@@ -142,22 +142,26 @@ since the frame chain may have been in an inconsistent 
state due to concurrent u
 
 ### Remote Profiling Frame Cache
 
-The `last_profiled_frame` field in `PyThreadState` supports an optimization for
-remote profilers that sample call stacks from external processes. When a remote
-profiler reads the call stack, it writes the current frame address to this 
field.
-The eval loop then keeps this pointer valid by updating it to the parent frame
-whenever a frame returns (in `_PyEval_FrameClearAndPop`).
+The `last_profiled_frame` and `last_profiled_frame_seq` fields in
+`PyThreadState` support an optimization for remote profilers that sample call
+stacks from external processes. When a remote profiler reads the call stack, it
+writes the current frame address to `last_profiled_frame`. The eval loop then
+keeps this pointer valid by updating it to the parent frame whenever a frame
+returns (in `_PyEval_FrameClearAndPop`) and increments the sequence.
 
 This creates a "high-water mark" that always points to a frame still on the 
stack.
 On subsequent samples, the profiler can walk from `current_frame` until it 
reaches
-`last_profiled_frame`, knowing that frames from that point downward are 
unchanged
-and can be retrieved from a cache. This significantly reduces the amount of 
remote
-memory reads needed when call stacks are deep and stable at their base.
-
-The update in `_PyEval_FrameClearAndPop` is guarded: it only writes when
-`last_profiled_frame` is non-NULL AND matches the frame being popped. This
-prevents transient frames (called and returned between profiler samples) from
-corrupting the cache pointer, while avoiding any overhead when profiling is 
inactive.
+`last_profiled_frame`, then validate the pointer and sequence before using 
cached
+callers. This prevents a later frame that reuses the same `_PyInterpreterFrame`
+address from being mistaken for the sampled frame. The cache significantly
+reduces the amount of remote memory reads needed when call stacks are deep and
+stable at their base.
+
+The update in `_PyEval_FrameClearAndPop` is guarded: it only advances the
+pointer and sequence when `last_profiled_frame` is non-NULL AND matches the
+frame being popped. This prevents transient frames (called and returned between
+profiler samples) from corrupting the cache anchor, while avoiding any overhead
+when profiling is inactive.
 
 
 ### The Instruction Pointer
diff --git 
a/Misc/NEWS.d/next/Library/2026-06-13-11-57-48.gh-issue-151436.UEDowO.rst 
b/Misc/NEWS.d/next/Library/2026-06-13-11-57-48.gh-issue-151436.UEDowO.rst
index 1d1aadbf57be485..6b10b03ba02fdf3 100644
--- a/Misc/NEWS.d/next/Library/2026-06-13-11-57-48.gh-issue-151436.UEDowO.rst
+++ b/Misc/NEWS.d/next/Library/2026-06-13-11-57-48.gh-issue-151436.UEDowO.rst
@@ -1,4 +1,4 @@
-Fix skewed stack trackes in the Tachyon profiler when caching is enabled and
+Fix skewed stack traces in the Tachyon profiler when caching is enabled and
 when generators and coroutines are profiled, by updating
 ``tstate->last_profiled_frame`` at every frame-removal site. The issue resulted
 in total erasure of some callers. Patch by Maurycy Pawłowski-Wieroński.
diff --git 
a/Misc/NEWS.d/next/Library/2026-06-17-22-31-57.gh-issue-151613.n0nua1.rst 
b/Misc/NEWS.d/next/Library/2026-06-17-22-31-57.gh-issue-151613.n0nua1.rst
new file mode 100644
index 000000000000000..fbf3ce47ff2739f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-06-17-22-31-57.gh-issue-151613.n0nua1.rst
@@ -0,0 +1,3 @@
+Fix another way the Tachyon profiler frame cache could produce impossible
+mixed stack traces when ``_PyInterpreterFrame`` addresses are reused, by
+validating cached frame anchors with a sequence counter.
diff --git a/Modules/_remote_debugging/_remote_debugging.h 
b/Modules/_remote_debugging/_remote_debugging.h
index 635e6e208902af5..fa37fb7b2167ecf 100644
--- a/Modules/_remote_debugging/_remote_debugging.h
+++ b/Modules/_remote_debugging/_remote_debugging.h
@@ -225,9 +225,15 @@ typedef struct {
 #define FRAME_CACHE_MAX_THREADS 32
 #define FRAME_CACHE_MAX_FRAMES 1024
 
+typedef struct {
+    uintptr_t frame;
+    uintptr_t seq;
+} FrameCacheAnchor;
+
 typedef struct {
     uint64_t thread_id;                      // 0 = empty slot
     uintptr_t thread_state_addr;
+    uintptr_t last_profiled_frame_seq;       // sequence paired with addrs[0]
     uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
     Py_ssize_t num_addrs;
     PyObject *thread_id_obj;                 // owned reference, NULL if empty
@@ -434,7 +440,7 @@ typedef struct {
     uintptr_t thread_state_addr;    // Owning thread state address
     uintptr_t base_frame_addr;      // Sentinel at bottom (for validation)
     uintptr_t gc_frame;             // GC frame address (0 if not tracking)
-    uintptr_t last_profiled_frame;  // Last cached frame (0 if no cache)
+    FrameCacheAnchor last_profiled; // Last cached frame anchor
     StackChunkList *chunks;         // Pre-copied stack chunks
     int skip_first_frame;           // Skip frame_addr itself (continue from 
its caller)
     RemoteReadPrefetch prefetch;     // Optional already-read thread/frame 
buffers
@@ -622,15 +628,21 @@ extern void frame_cache_cleanup(RemoteUnwinderObject 
*unwinder);
 extern FrameCacheEntry *frame_cache_find(RemoteUnwinderObject *unwinder, 
uint64_t thread_id);
 extern FrameCacheEntry *frame_cache_find_by_tstate(RemoteUnwinderObject 
*unwinder, uintptr_t tstate_addr);
 extern int clear_last_profiled_frames(RemoteUnwinderObject *unwinder);
+extern int set_last_profiled_frame(RemoteUnwinderObject *unwinder, uintptr_t 
tstate_addr, uintptr_t frame_addr);
 extern void frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, 
PyObject *result);
 extern int frame_cache_lookup_and_extend(
     RemoteUnwinderObject *unwinder,
     uint64_t thread_id,
-    uintptr_t last_profiled_frame,
+    uintptr_t thread_state_addr,
+    FrameCacheAnchor anchor,
     PyObject *frame_info,
     uintptr_t *frame_addrs,
     Py_ssize_t *num_addrs,
     Py_ssize_t max_addrs);
+extern int frame_cache_anchor_matches(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    FrameCacheAnchor anchor);
 // Returns: 1 = stored, 0 = not stored (graceful), -1 = error
 // Only stores complete stacks that reach base_frame_addr
 extern int frame_cache_store(
@@ -640,6 +652,7 @@ extern int frame_cache_store(
     const uintptr_t *addrs,
     Py_ssize_t num_addrs,
     uintptr_t thread_state_addr,
+    uintptr_t last_profiled_frame_seq,
     uintptr_t base_frame_addr,
     uintptr_t last_frame_visited);
 
diff --git a/Modules/_remote_debugging/debug_offsets_validation.h 
b/Modules/_remote_debugging/debug_offsets_validation.h
index f070f03ac459dcd..c0c01a0a639e196 100644
--- a/Modules/_remote_debugging/debug_offsets_validation.h
+++ b/Modules/_remote_debugging/debug_offsets_validation.h
@@ -31,7 +31,7 @@
 #define FIELD_SIZE(type, member) sizeof(((type *)0)->member)
 
 enum {
-    PY_REMOTE_DEBUG_OFFSETS_TOTAL_SIZE = 880,
+    PY_REMOTE_DEBUG_OFFSETS_TOTAL_SIZE = 888,
     PY_REMOTE_ASYNC_DEBUG_OFFSETS_TOTAL_SIZE = 104,
 };
 
@@ -261,7 +261,8 @@ validate_fixed_field(
     APPLY(thread_state, next, sizeof(uintptr_t), _Alignof(uintptr_t), 
buffer_size); \
     APPLY(thread_state, current_frame, sizeof(uintptr_t), _Alignof(uintptr_t), 
buffer_size); \
     APPLY(thread_state, base_frame, sizeof(uintptr_t), _Alignof(uintptr_t), 
buffer_size); \
-    APPLY(thread_state, last_profiled_frame, sizeof(uintptr_t), 
_Alignof(uintptr_t), buffer_size)
+    APPLY(thread_state, last_profiled_frame, sizeof(uintptr_t), 
_Alignof(uintptr_t), buffer_size); \
+    APPLY(thread_state, last_profiled_frame_seq, sizeof(uintptr_t), 
_Alignof(uintptr_t), buffer_size)
 
 #define PY_REMOTE_DEBUG_INTERPRETER_STATE_FIELDS(APPLY, buffer_size) \
     APPLY(interpreter_state, id, sizeof(int64_t), _Alignof(int64_t), 
buffer_size); \
diff --git a/Modules/_remote_debugging/frame_cache.c 
b/Modules/_remote_debugging/frame_cache.c
index 19fc406bca9ac96..1b2d9a60f408b91 100644
--- a/Modules/_remote_debugging/frame_cache.c
+++ b/Modules/_remote_debugging/frame_cache.c
@@ -147,25 +147,87 @@ frame_cache_invalidate_stale(RemoteUnwinderObject 
*unwinder, PyObject *result)
             Py_CLEAR(unwinder->frame_cache[i].frame_list);
             unwinder->frame_cache[i].thread_id = 0;
             unwinder->frame_cache[i].thread_state_addr = 0;
+            unwinder->frame_cache[i].last_profiled_frame_seq = 0;
             unwinder->frame_cache[i].num_addrs = 0;
             STATS_INC(unwinder, stale_cache_invalidations);
         }
     }
 }
 
+static int
+read_last_profiled_anchor(RemoteUnwinderObject *unwinder,
+                          uintptr_t thread_state_addr,
+                          FrameCacheAnchor *anchor)
+{
+    uintptr_t frame_offset = 
(uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame;
+    uintptr_t seq_offset = 
(uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame_seq;
+
+    // These fields are adjacent in PyThreadState. Read them together when the
+    // layout allows it so validation uses a pointer and sequence from the same
+    // remote-memory read.
+    if (seq_offset == frame_offset + sizeof(uintptr_t)) {
+        uintptr_t live_anchor[2];
+        if (_Py_RemoteDebug_ReadRemoteMemory(&unwinder->handle,
+                                             thread_state_addr + frame_offset,
+                                             sizeof(live_anchor),
+                                             live_anchor) < 0) {
+            return -1;
+        }
+        anchor->frame = live_anchor[0];
+        anchor->seq = live_anchor[1];
+        return 0;
+    }
+
+    if (read_ptr(unwinder, thread_state_addr + frame_offset, &anchor->frame) < 
0) {
+        return -1;
+    }
+    return read_ptr(unwinder, thread_state_addr + seq_offset, &anchor->seq);
+}
+
+static Py_ssize_t
+find_cached_frame_addr(const FrameCacheEntry *entry, uintptr_t frame_addr,
+                       uintptr_t *real_pops)
+{
+    *real_pops = 0;
+    for (Py_ssize_t i = 0; i < entry->num_addrs; i++) {
+        if (entry->addrs[i] == frame_addr) {
+            return i;
+        }
+        if (entry->addrs[i] != 0) {
+            (*real_pops)++;
+        }
+    }
+    return -1;
+}
+
+int
+frame_cache_anchor_matches(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    FrameCacheAnchor anchor)
+{
+    FrameCacheAnchor live_anchor = {0, 0};
+    if (read_last_profiled_anchor(unwinder, thread_state_addr, &live_anchor) < 
0) {
+        PyErr_Clear();
+        return 0;
+    }
+    return live_anchor.frame == anchor.frame && live_anchor.seq == anchor.seq;
+}
+
 // Find last_profiled_frame in cache and extend frame_info with cached 
continuation
 // If frame_addrs is provided (not NULL), also extends it with cached addresses
 int
 frame_cache_lookup_and_extend(
     RemoteUnwinderObject *unwinder,
     uint64_t thread_id,
-    uintptr_t last_profiled_frame,
+    uintptr_t thread_state_addr,
+    FrameCacheAnchor anchor,
     PyObject *frame_info,
     uintptr_t *frame_addrs,
     Py_ssize_t *num_addrs,
     Py_ssize_t max_addrs)
 {
-    if (!unwinder->frame_cache || last_profiled_frame == 0) {
+    if (!unwinder->frame_cache || anchor.frame == 0) {
         return 0;
     }
 
@@ -173,24 +235,31 @@ frame_cache_lookup_and_extend(
     if (!entry || !entry->frame_list) {
         return 0;
     }
+    if (entry->thread_state_addr != thread_state_addr) {
+        return 0;
+    }
 
     assert(entry->num_addrs >= 0 && entry->num_addrs <= 
FRAME_CACHE_MAX_FRAMES);
 
-    // Find the index where last_profiled_frame matches
-    Py_ssize_t start_idx = -1;
-    for (Py_ssize_t i = 0; i < entry->num_addrs; i++) {
-        if (entry->addrs[i] == last_profiled_frame) {
-            start_idx = i;
-            break;
-        }
-    }
-
+    uintptr_t real_pops = 0;
+    Py_ssize_t start_idx = find_cached_frame_addr(entry, anchor.frame, 
&real_pops);
     if (start_idx < 0) {
         return 0;  // Not found
     }
     assert(start_idx < entry->num_addrs);
 
+    // Synthetic marker frames (<native>/<GC>) are stored as addr-0 entries but
+    // never increment last_profiled_frame_seq in the target (only real frame
+    // pops do). Count the real frames before start_idx so the sequence check 
is
+    // not thrown off by markers sitting between the leaf and the anchor.
+    if (entry->last_profiled_frame_seq + real_pops != anchor.seq) {
+        return 0;
+    }
+
     Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list);
+    if (start_idx >= num_frames) {
+        return 0;
+    }
 
     // Extend frame_info with frames ABOVE start_idx (not including it).
     // The frame at start_idx (last_profiled_frame) was the executing frame
@@ -200,6 +269,9 @@ frame_cache_lookup_and_extend(
     if (cache_start >= num_frames) {
         return 0;  // Nothing above last_profiled_frame to extend with
     }
+    if (!frame_cache_anchor_matches(unwinder, thread_state_addr, anchor)) {
+        return 0;
+    }
 
     PyObject *slice = PyList_GetSlice(entry->frame_list, cache_start, 
num_frames);
     if (!slice) {
@@ -235,6 +307,7 @@ frame_cache_store(
     const uintptr_t *addrs,
     Py_ssize_t num_addrs,
     uintptr_t thread_state_addr,
+    uintptr_t last_profiled_frame_seq,
     uintptr_t base_frame_addr,
     uintptr_t last_frame_visited)
 {
@@ -277,6 +350,7 @@ frame_cache_store(
     }
     entry->thread_id = thread_id;
     entry->thread_state_addr = thread_state_addr;
+    entry->last_profiled_frame_seq = last_profiled_frame_seq;
     if (entry->thread_id_obj == NULL) {
         entry->thread_id_obj = PyLong_FromUnsignedLongLong(thread_id);
         if (entry->thread_id_obj == NULL) {
diff --git a/Modules/_remote_debugging/frames.c 
b/Modules/_remote_debugging/frames.c
index e7d2a2764390261..46968acc6ff1feb 100644
--- a/Modules/_remote_debugging/frames.c
+++ b/Modules/_remote_debugging/frames.c
@@ -416,7 +416,7 @@ process_frame_chain(
             Py_DECREF(frame);
         }
 
-        if (ctx->last_profiled_frame != 0 && frame_addr == 
ctx->last_profiled_frame) {
+        if (ctx->last_profiled.frame != 0 && frame_addr == 
ctx->last_profiled.frame) {
             ctx->stopped_at_cached_frame = 1;
             break;
         }
@@ -437,14 +437,23 @@ process_frame_chain(
     return 0;
 }
 
-// Clear last_profiled_frame for all threads in the target process.
-// This must be called at the start of profiling to avoid stale values
-// from previous profilers causing us to stop frame walking early.
+int
+set_last_profiled_frame(RemoteUnwinderObject *unwinder, uintptr_t tstate_addr,
+                        uintptr_t frame_addr)
+{
+    uintptr_t lpf_addr = tstate_addr +
+        (uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame;
+    return _Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr,
+                                             sizeof(uintptr_t), &frame_addr);
+}
+
+// Clear the profiler anchor frame for all threads in the target process.  The
+// sequence is intentionally preserved: a zero frame disables cache lookup, and
+// the next profiler-owned anchor should use the target's current generation.
 int
 clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
 {
     uintptr_t current_interp = unwinder->interpreter_addr;
-    uintptr_t zero = 0;
     const size_t MAX_INTERPRETERS = 256;
     size_t interp_count = 0;
 
@@ -467,11 +476,8 @@ clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
         size_t thread_count = 0;
         while (tstate_addr != 0 && thread_count < MAX_THREADS_PER_INTERP) {
             thread_count++;
-            // Clear last_profiled_frame
-            uintptr_t lpf_addr = tstate_addr + 
unwinder->debug_offsets.thread_state.last_profiled_frame;
-            if (_Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr,
-                                                  sizeof(uintptr_t), &zero) < 
0) {
-                // Non-fatal: just continue
+            uintptr_t no_frame = 0;
+            if (set_last_profiled_frame(unwinder, tstate_addr, no_frame) < 0) {
                 PyErr_Clear();
             }
 
@@ -512,10 +518,10 @@ try_full_cache_hit(
     const FrameWalkContext *ctx,
     uint64_t thread_id)
 {
-    if (!unwinder->frame_cache || ctx->last_profiled_frame == 0) {
+    if (!unwinder->frame_cache || ctx->last_profiled.frame == 0) {
         return 0;
     }
-    if (ctx->frame_addr != ctx->last_profiled_frame) {
+    if (ctx->frame_addr != ctx->last_profiled.frame) {
         return 0;
     }
 
@@ -523,10 +529,16 @@ try_full_cache_hit(
     if (!entry || !entry->frame_list) {
         return 0;
     }
+    if (entry->thread_state_addr != ctx->thread_state_addr) {
+        return 0;
+    }
 
     if (entry->num_addrs == 0 || entry->addrs[0] != ctx->frame_addr) {
         return 0;
     }
+    if (entry->last_profiled_frame_seq != ctx->last_profiled.seq) {
+        return 0;
+    }
 
     PyObject *current_frame = NULL;
     uintptr_t code_object_addr = 0;
@@ -544,6 +556,11 @@ try_full_cache_hit(
     if (parse_result < 0) {
         return -1;
     }
+    if (!frame_cache_anchor_matches(unwinder, ctx->thread_state_addr,
+                                    ctx->last_profiled)) {
+        Py_XDECREF(current_frame);
+        return 0;
+    }
 
     if (current_frame != NULL) {
         if (PyList_Append(ctx->frame_info, current_frame) < 0) {
@@ -582,9 +599,12 @@ collect_frames_with_cache(
 
     assert(ctx->chunks != NULL);
 
+    // Cache misses copy stack chunks before walking. Frames found there are
+    // parsed from a stable snapshot, which keeps moving stacks from seeding 
the
+    // cache with an impossible parent chain.
     if (ctx->chunks->count == 0) {
         if (copy_stack_chunks(unwinder, ctx->thread_state_addr, ctx->chunks) < 
0) {
-            PyErr_Clear();
+            return -1;
         }
     }
 
@@ -598,7 +618,9 @@ collect_frames_with_cache(
 
     if (ctx->stopped_at_cached_frame) {
         Py_ssize_t frames_before_cache = PyList_GET_SIZE(ctx->frame_info);
-        int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id, 
ctx->last_profiled_frame,
+        int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id,
+                                                         
ctx->thread_state_addr,
+                                                         ctx->last_profiled,
                                                          ctx->frame_info, 
ctx->frame_addrs, &ctx->num_addrs,
                                                          ctx->max_addrs);
         if (cache_result < 0) {
@@ -610,7 +632,7 @@ collect_frames_with_cache(
             // Continue walking from last_profiled_frame, skipping it (already 
processed)
             Py_ssize_t frames_before_walk = PyList_GET_SIZE(ctx->frame_info);
             FrameWalkContext continue_ctx = {
-                .frame_addr = ctx->last_profiled_frame,
+                .frame_addr = ctx->last_profiled.frame,
                 .base_frame_addr = ctx->base_frame_addr,
                 .gc_frame = ctx->gc_frame,
                 .chunks = ctx->chunks,
@@ -634,13 +656,14 @@ collect_frames_with_cache(
             STATS_ADD(unwinder, frames_read_from_cache, 
PyList_GET_SIZE(ctx->frame_info) - frames_before_cache);
         }
     } else {
-        if (ctx->last_profiled_frame == 0) {
+        if (ctx->last_profiled.frame == 0) {
             STATS_INC(unwinder, frame_cache_misses);
         }
     }
 
-    if (frame_cache_store(unwinder, thread_id, ctx->frame_info, 
ctx->frame_addrs, ctx->num_addrs,
-                          ctx->thread_state_addr, ctx->base_frame_addr,
+    if (frame_cache_store(unwinder, thread_id, ctx->frame_info, 
ctx->frame_addrs,
+                          ctx->num_addrs, ctx->thread_state_addr,
+                          ctx->last_profiled.seq, ctx->base_frame_addr,
                           ctx->last_frame_visited) < 0) {
         return -1;
     }
diff --git a/Modules/_remote_debugging/module.c 
b/Modules/_remote_debugging/module.c
index 36115f20d9d4ccc..7979cd43c6a1274 100644
--- a/Modules/_remote_debugging/module.c
+++ b/Modules/_remote_debugging/module.c
@@ -475,8 +475,8 @@ 
_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
         return -1;
     }
 
-    // Clear stale last_profiled_frame values from previous profilers
-    // This prevents us from stopping frame walking early due to stale values
+    // Clear stale profiler anchors from previous profilers. This prevents us
+    // from stopping frame walking early due to stale frame pointers.
     if (cache_frames) {
         clear_last_profiled_frames(self);
     }
diff --git a/Modules/_remote_debugging/threads.c 
b/Modules/_remote_debugging/threads.c
index 81735e85395ac9e..29f22f14c9b29f8 100644
--- a/Modules/_remote_debugging/threads.c
+++ b/Modules/_remote_debugging/threads.c
@@ -503,7 +503,8 @@ unwind_stack_for_thread(
         goto error;
     }
 
-    // In cache mode, copying stack chunks is more expensive than direct 
memory reads
+    // Cache mode skips this for full hits, but cache misses copy chunks before
+    // walking so newly stored cache entries come from a stable stack snapshot.
     if (!unwinder->cache_frames) {
         if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) {
             set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy 
stack chunks");
@@ -528,18 +529,17 @@ unwind_stack_for_thread(
 
     if (unwinder->cache_frames) {
         // Use cache to avoid re-reading unchanged parent frames
-        ctx.last_profiled_frame = GET_MEMBER(uintptr_t, ts,
+        ctx.last_profiled.frame = GET_MEMBER(uintptr_t, ts,
             unwinder->debug_offsets.thread_state.last_profiled_frame);
+        ctx.last_profiled.seq = GET_MEMBER(uintptr_t, ts,
+            unwinder->debug_offsets.thread_state.last_profiled_frame_seq);
         if (collect_frames_with_cache(unwinder, &ctx, tid) < 0) {
             set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to 
collect frames");
             goto error;
         }
         // Update last_profiled_frame for next sample if it changed
-        if (frame_addr != ctx.last_profiled_frame) {
-            uintptr_t lpf_addr =
-                *current_tstate + 
(uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame;
-            if (_Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr,
-                                                  sizeof(uintptr_t), 
&frame_addr) < 0) {
+        if (frame_addr != ctx.last_profiled.frame) {
+            if (set_last_profiled_frame(unwinder, *current_tstate, frame_addr) 
< 0) {
                 PyErr_Clear();  // Non-fatal
             }
         }
diff --git a/Python/pystate.c b/Python/pystate.c
index fed1df0173bacf1..29f13e92e0dd1fd 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -1605,6 +1605,8 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     tstate->current_frame = &_tstate->base_frame;
     // base_frame pointer for profilers to validate stack unwinding
     tstate->base_frame = &_tstate->base_frame;
+    tstate->last_profiled_frame = NULL;
+    tstate->last_profiled_frame_seq = 0;
     tstate->datastack_chunk = NULL;
     tstate->datastack_top = NULL;
     tstate->datastack_limit = NULL;

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to