https://github.com/python/cpython/commit/568a819f677238095441d17b9d8ac6d3ea4e3314
commit: 568a819f677238095441d17b9d8ac6d3ea4e3314
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-12-17T15:12:28Z
summary:

gh-138122: Validate base frame before caching in remote debugging frame cache 
(#142852)

files:
A Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst
M Lib/test/test_profiling/test_sampling_profiler/test_integration.py
M Modules/_remote_debugging/_remote_debugging.h
M Modules/_remote_debugging/frame_cache.c
M Modules/_remote_debugging/frames.c
M Modules/_remote_debugging/threads.c

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py 
b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
index e7f6c59d5cb770..b82474858ddd4a 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
@@ -863,3 +863,98 @@ def test_async_aware_running_sees_only_cpu_task(self):
         self.assertGreater(cpu_percentage, 90.0,
             f"cpu_leaf should dominate samples in 'running' mode, "
             f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
+
+
+def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
+    """Generate a script with deep nested generators for stress testing."""
+    lines = [
+        'import sys',
+        'sys.setrecursionlimit(5000)',
+        '',
+    ]
+    # Generate chain of yield-from functions
+    for i in range(chain_depth - 1):
+        lines.extend([
+            f'def deep_yield_chain_{i}(n):',
+            f'    yield ("L{i}", n)',
+            f'    yield from deep_yield_chain_{i + 1}(n)',
+            '',
+        ])
+    # Last chain function calls recursive_diver
+    lines.extend([
+        f'def deep_yield_chain_{chain_depth - 1}(n):',
+        f'    yield ("L{chain_depth - 1}", n)',
+        f'    yield from recursive_diver(n, {chain_depth})',
+        '',
+        'def recursive_diver(n, depth):',
+        '    yield (f"DIVE_{depth}", n)',
+        f'    if depth < {recurse_depth}:',
+        '        yield from recursive_diver(n, depth + 1)',
+        '    else:',
+        '        for i in range(5):',
+        '            yield (f"BOTTOM_{depth}", i)',
+        '',
+        'def oscillating_generator(iterations=1000):',
+        '    for i in range(iterations):',
+        '        yield ("OSCILLATE", i)',
+        '        yield from deep_yield_chain_0(i)',
+        '',
+        'def run_forever():',
+        '    while True:',
+        '        for _ in oscillating_generator(10):',
+        '            pass',
+        '',
+        '_test_sock.sendall(b"working")',
+        'run_forever()',
+    ])
+    return '\n'.join(lines)
+
+
+@requires_remote_subprocess_debugging()
+class TestDeepGeneratorFrameCache(unittest.TestCase):
+    """Test frame cache consistency with deep oscillating generator stacks."""
+
+    def test_all_stacks_share_same_base_frame(self):
+        """Verify all sampled stacks reach the entry point function.
+
+        When profiling deep generators that oscillate up and down the call
+        stack, every sample should include the entry point function
+        (run_forever) in its call chain. If the frame cache stores
+        incomplete stacks, some samples will be missing this base function,
+        causing broken flamegraphs.
+        """
+        script = _generate_deep_generators_script()
+        with test_subprocess(script, wait_for_working=True) as subproc:
+            collector = CollapsedStackCollector(sample_interval_usec=1, 
skip_idle=False)
+
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=2,
+                )
+
+        samples_with_entry_point = 0
+        samples_without_entry_point = 0
+        total_samples = 0
+
+        for (call_tree, _thread_id), count in collector.stack_counter.items():
+            total_samples += count
+            if call_tree:
+                has_entry_point = call_tree and call_tree[0][2] == "<module>"
+                if has_entry_point:
+                    samples_with_entry_point += count
+                else:
+                    samples_without_entry_point += count
+
+        self.assertGreater(total_samples, 100,
+            f"Expected at least 100 samples, got {total_samples}")
+
+        self.assertEqual(samples_without_entry_point, 0,
+            f"Found {samples_without_entry_point}/{total_samples} samples "
+            f"missing the entry point function 'run_forever'. This indicates "
+            f"incomplete stacks are being returned, likely due to frame cache "
+            f"storing partial stack traces.")
diff --git 
a/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst 
b/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst
new file mode 100644
index 00000000000000..e33a761aa61825
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst
@@ -0,0 +1,4 @@
+Fix incomplete stack traces in the Tachyon profiler's frame cache when
+profiling code with deeply nested generators. The frame cache now validates
+that stack traces reach the base frame before caching, preventing broken
+flamegraphs. Patch by Pablo Galindo.
diff --git a/Modules/_remote_debugging/_remote_debugging.h 
b/Modules/_remote_debugging/_remote_debugging.h
index 2f3efedd1e0ed5..663479f235af09 100644
--- a/Modules/_remote_debugging/_remote_debugging.h
+++ b/Modules/_remote_debugging/_remote_debugging.h
@@ -429,7 +429,8 @@ extern int process_frame_chain(
     int *stopped_at_cached_frame,
     uintptr_t *frame_addrs,
     Py_ssize_t *num_addrs,
-    Py_ssize_t max_addrs
+    Py_ssize_t max_addrs,
+    uintptr_t *out_last_frame_addr
 );
 
 /* Frame cache functions */
@@ -447,18 +448,22 @@ extern int frame_cache_lookup_and_extend(
     Py_ssize_t *num_addrs,
     Py_ssize_t max_addrs);
 // Returns: 1 = stored, 0 = not stored (graceful), -1 = error
+// Only stores complete stacks that reach base_frame_addr
 extern int frame_cache_store(
     RemoteUnwinderObject *unwinder,
     uint64_t thread_id,
     PyObject *frame_list,
     const uintptr_t *addrs,
-    Py_ssize_t num_addrs);
+    Py_ssize_t num_addrs,
+    uintptr_t base_frame_addr,
+    uintptr_t last_frame_visited);
 
 extern int collect_frames_with_cache(
     RemoteUnwinderObject *unwinder,
     uintptr_t frame_addr,
     StackChunkList *chunks,
     PyObject *frame_info,
+    uintptr_t base_frame_addr,
     uintptr_t gc_frame,
     uintptr_t last_profiled_frame,
     uint64_t thread_id);
diff --git a/Modules/_remote_debugging/frame_cache.c 
b/Modules/_remote_debugging/frame_cache.c
index 4598b9dc353278..ab7891445e07a3 100644
--- a/Modules/_remote_debugging/frame_cache.c
+++ b/Modules/_remote_debugging/frame_cache.c
@@ -194,6 +194,7 @@ frame_cache_lookup_and_extend(
 }
 
 // Store frame list with addresses in cache
+// Only stores complete stacks that reach base_frame_addr (validation done 
internally)
 // Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 
= error
 int
 frame_cache_store(
@@ -201,12 +202,25 @@ frame_cache_store(
     uint64_t thread_id,
     PyObject *frame_list,
     const uintptr_t *addrs,
-    Py_ssize_t num_addrs)
+    Py_ssize_t num_addrs,
+    uintptr_t base_frame_addr,
+    uintptr_t last_frame_visited)
 {
     if (!unwinder->frame_cache || thread_id == 0) {
         return 0;
     }
 
+    // Validate we have a complete stack before caching.
+    // Only cache if last_frame_visited matches base_frame_addr (the sentinel
+    // at the bottom of the stack). Note: we use last_frame_visited rather than
+    // addrs[num_addrs-1] because the base frame is visited but not added to 
the
+    // addrs array (it returns frame==NULL from is_frame_valid due to
+    // owner==FRAME_OWNED_BY_INTERPRETER).
+    if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) {
+        // Incomplete stack - don't cache (graceful degradation)
+        return 0;
+    }
+
     // Clamp to max frames
     if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
         num_addrs = FRAME_CACHE_MAX_FRAMES;
diff --git a/Modules/_remote_debugging/frames.c 
b/Modules/_remote_debugging/frames.c
index abde60c45766a5..47e34e9f945cbd 100644
--- a/Modules/_remote_debugging/frames.c
+++ b/Modules/_remote_debugging/frames.c
@@ -265,7 +265,8 @@ process_frame_chain(
     int *stopped_at_cached_frame,
     uintptr_t *frame_addrs,      // optional: C array to receive frame 
addresses
     Py_ssize_t *num_addrs,       // in/out: current count / updated count
-    Py_ssize_t max_addrs)        // max capacity of frame_addrs array
+    Py_ssize_t max_addrs,        // max capacity of frame_addrs array
+    uintptr_t *out_last_frame_addr)  // optional: receives last frame address 
visited
 {
     uintptr_t frame_addr = initial_frame_addr;
     uintptr_t prev_frame_addr = 0;
@@ -273,10 +274,13 @@ process_frame_chain(
     const size_t MAX_FRAMES = 1024 + 512;
     size_t frame_count = 0;
 
-    // Initialize output flag
+    // Initialize output parameters
     if (stopped_at_cached_frame) {
         *stopped_at_cached_frame = 0;
     }
+    if (out_last_frame_addr) {
+        *out_last_frame_addr = 0;
+    }
 
     // Quick check: if current_frame == last_profiled_frame, entire stack is 
unchanged
     if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) 
{
@@ -390,6 +394,11 @@ process_frame_chain(
         return -1;
     }
 
+    // Set output parameter for caller (needed for cache validation)
+    if (out_last_frame_addr) {
+        *out_last_frame_addr = last_frame_addr;
+    }
+
     return 0;
 }
 
@@ -537,6 +546,7 @@ collect_frames_with_cache(
     uintptr_t frame_addr,
     StackChunkList *chunks,
     PyObject *frame_info,
+    uintptr_t base_frame_addr,
     uintptr_t gc_frame,
     uintptr_t last_profiled_frame,
     uint64_t thread_id)
@@ -551,11 +561,13 @@ collect_frames_with_cache(
     uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
     Py_ssize_t num_addrs = 0;
     Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
+    uintptr_t last_frame_visited = 0;
 
     int stopped_at_cached = 0;
-    if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, 
gc_frame,
+    if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 
base_frame_addr, gc_frame,
                             last_profiled_frame, &stopped_at_cached,
-                            addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
+                            addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
+                            &last_frame_visited) < 0) {
         return -1;
     }
 
@@ -575,23 +587,28 @@ collect_frames_with_cache(
             // Cache miss - continue walking from last_profiled_frame to get 
the rest
             STATS_INC(unwinder, frame_cache_misses);
             Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
-            if (process_frame_chain(unwinder, last_profiled_frame, chunks, 
frame_info, 0, gc_frame,
-                                    0, NULL, addrs, &num_addrs, 
FRAME_CACHE_MAX_FRAMES) < 0) {
+            if (process_frame_chain(unwinder, last_profiled_frame, chunks, 
frame_info, base_frame_addr, gc_frame,
+                                    0, NULL, addrs, &num_addrs, 
FRAME_CACHE_MAX_FRAMES,
+                                    &last_frame_visited) < 0) {
                 return -1;
             }
             STATS_ADD(unwinder, frames_read_from_memory, 
PyList_GET_SIZE(frame_info) - frames_before_walk);
         } else {
-            // Partial cache hit
+            // Partial cache hit - cache was validated when stored, so we 
trust it
             STATS_INC(unwinder, frame_cache_partial_hits);
             STATS_ADD(unwinder, frames_read_from_cache, 
PyList_GET_SIZE(frame_info) - frames_before_cache);
         }
-    } else if (last_profiled_frame == 0) {
-        // No cache involvement (no last_profiled_frame or cache disabled)
-        STATS_INC(unwinder, frame_cache_misses);
+    } else {
+        if (last_profiled_frame == 0) {
+            // No cache involvement (no last_profiled_frame or cache disabled)
+            STATS_INC(unwinder, frame_cache_misses);
+        }
     }
 
-    // Store in cache (frame_cache_store handles truncation if num_addrs > 
FRAME_CACHE_MAX_FRAMES)
-    if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 
0) {
+    // Store in cache - frame_cache_store validates internally that we have a
+    // complete stack (reached base_frame_addr) before actually storing
+    if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs,
+                          base_frame_addr, last_frame_visited) < 0) {
         return -1;
     }
 
diff --git a/Modules/_remote_debugging/threads.c 
b/Modules/_remote_debugging/threads.c
index 81c13ea48e3c49..6db774ecfc269e 100644
--- a/Modules/_remote_debugging/threads.c
+++ b/Modules/_remote_debugging/threads.c
@@ -430,7 +430,7 @@ unwind_stack_for_thread(
         uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
             unwinder->debug_offsets.thread_state.last_profiled_frame);
         if (collect_frames_with_cache(unwinder, frame_addr, &chunks, 
frame_info,
-                                      gc_frame, last_profiled_frame, tid) < 0) 
{
+                                      base_frame_addr, gc_frame, 
last_profiled_frame, tid) < 0) {
             set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to 
collect frames");
             goto error;
         }
@@ -444,7 +444,7 @@ unwind_stack_for_thread(
     } else {
         // No caching - process entire frame chain with base_frame validation
         if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
-                                base_frame_addr, gc_frame, 0, NULL, NULL, 
NULL, 0) < 0) {
+                                base_frame_addr, gc_frame, 0, NULL, NULL, 
NULL, 0, NULL) < 0) {
             set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to 
process frame chain");
             goto error;
         }

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to