https://github.com/python/cpython/commit/568a819f677238095441d17b9d8ac6d3ea4e3314
commit: 568a819f677238095441d17b9d8ac6d3ea4e3314
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-12-17T15:12:28Z
summary:
gh-138122: Validate base frame before caching in remote debugging frame cache
(#142852)
files:
A Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst
M Lib/test/test_profiling/test_sampling_profiler/test_integration.py
M Modules/_remote_debugging/_remote_debugging.h
M Modules/_remote_debugging/frame_cache.c
M Modules/_remote_debugging/frames.c
M Modules/_remote_debugging/threads.c
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
index e7f6c59d5cb770..b82474858ddd4a 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
@@ -863,3 +863,98 @@ def test_async_aware_running_sees_only_cpu_task(self):
self.assertGreater(cpu_percentage, 90.0,
f"cpu_leaf should dominate samples in 'running' mode, "
f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
+
+
+def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
+ """Generate a script with deep nested generators for stress testing."""
+ lines = [
+ 'import sys',
+ 'sys.setrecursionlimit(5000)',
+ '',
+ ]
+ # Generate chain of yield-from functions
+ for i in range(chain_depth - 1):
+ lines.extend([
+ f'def deep_yield_chain_{i}(n):',
+ f' yield ("L{i}", n)',
+ f' yield from deep_yield_chain_{i + 1}(n)',
+ '',
+ ])
+ # Last chain function calls recursive_diver
+ lines.extend([
+ f'def deep_yield_chain_{chain_depth - 1}(n):',
+ f' yield ("L{chain_depth - 1}", n)',
+ f' yield from recursive_diver(n, {chain_depth})',
+ '',
+ 'def recursive_diver(n, depth):',
+ ' yield (f"DIVE_{depth}", n)',
+ f' if depth < {recurse_depth}:',
+ ' yield from recursive_diver(n, depth + 1)',
+ ' else:',
+ ' for i in range(5):',
+ ' yield (f"BOTTOM_{depth}", i)',
+ '',
+ 'def oscillating_generator(iterations=1000):',
+ ' for i in range(iterations):',
+ ' yield ("OSCILLATE", i)',
+ ' yield from deep_yield_chain_0(i)',
+ '',
+ 'def run_forever():',
+ ' while True:',
+ ' for _ in oscillating_generator(10):',
+ ' pass',
+ '',
+ '_test_sock.sendall(b"working")',
+ 'run_forever()',
+ ])
+ return '\n'.join(lines)
+
+
+@requires_remote_subprocess_debugging()
+class TestDeepGeneratorFrameCache(unittest.TestCase):
+ """Test frame cache consistency with deep oscillating generator stacks."""
+
+ def test_all_stacks_share_same_base_frame(self):
+ """Verify all sampled stacks reach the entry point function.
+
+ When profiling deep generators that oscillate up and down the call
+ stack, every sample should include the entry point function
+ (run_forever) in its call chain. If the frame cache stores
+ incomplete stacks, some samples will be missing this base function,
+ causing broken flamegraphs.
+ """
+ script = _generate_deep_generators_script()
+ with test_subprocess(script, wait_for_working=True) as subproc:
+ collector = CollapsedStackCollector(sample_interval_usec=1,
skip_idle=False)
+
+ with (
+ io.StringIO() as captured_output,
+ mock.patch("sys.stdout", captured_output),
+ ):
+ profiling.sampling.sample.sample(
+ subproc.process.pid,
+ collector,
+ duration_sec=2,
+ )
+
+ samples_with_entry_point = 0
+ samples_without_entry_point = 0
+ total_samples = 0
+
+ for (call_tree, _thread_id), count in collector.stack_counter.items():
+ total_samples += count
+ if call_tree:
+ has_entry_point = call_tree and call_tree[0][2] == "<module>"
+ if has_entry_point:
+ samples_with_entry_point += count
+ else:
+ samples_without_entry_point += count
+
+ self.assertGreater(total_samples, 100,
+ f"Expected at least 100 samples, got {total_samples}")
+
+ self.assertEqual(samples_without_entry_point, 0,
+ f"Found {samples_without_entry_point}/{total_samples} samples "
+ f"missing the entry point function 'run_forever'. This indicates "
+ f"incomplete stacks are being returned, likely due to frame cache "
+ f"storing partial stack traces.")
diff --git
a/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst
b/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst
new file mode 100644
index 00000000000000..e33a761aa61825
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-17-03-03-12.gh-issue-138122.m3EF9E.rst
@@ -0,0 +1,4 @@
+Fix incomplete stack traces in the Tachyon profiler's frame cache when
+profiling code with deeply nested generators. The frame cache now validates
+that stack traces reach the base frame before caching, preventing broken
+flamegraphs. Patch by Pablo Galindo.
diff --git a/Modules/_remote_debugging/_remote_debugging.h
b/Modules/_remote_debugging/_remote_debugging.h
index 2f3efedd1e0ed5..663479f235af09 100644
--- a/Modules/_remote_debugging/_remote_debugging.h
+++ b/Modules/_remote_debugging/_remote_debugging.h
@@ -429,7 +429,8 @@ extern int process_frame_chain(
int *stopped_at_cached_frame,
uintptr_t *frame_addrs,
Py_ssize_t *num_addrs,
- Py_ssize_t max_addrs
+ Py_ssize_t max_addrs,
+ uintptr_t *out_last_frame_addr
);
/* Frame cache functions */
@@ -447,18 +448,22 @@ extern int frame_cache_lookup_and_extend(
Py_ssize_t *num_addrs,
Py_ssize_t max_addrs);
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
+// Only stores complete stacks that reach base_frame_addr
extern int frame_cache_store(
RemoteUnwinderObject *unwinder,
uint64_t thread_id,
PyObject *frame_list,
const uintptr_t *addrs,
- Py_ssize_t num_addrs);
+ Py_ssize_t num_addrs,
+ uintptr_t base_frame_addr,
+ uintptr_t last_frame_visited);
extern int collect_frames_with_cache(
RemoteUnwinderObject *unwinder,
uintptr_t frame_addr,
StackChunkList *chunks,
PyObject *frame_info,
+ uintptr_t base_frame_addr,
uintptr_t gc_frame,
uintptr_t last_profiled_frame,
uint64_t thread_id);
diff --git a/Modules/_remote_debugging/frame_cache.c
b/Modules/_remote_debugging/frame_cache.c
index 4598b9dc353278..ab7891445e07a3 100644
--- a/Modules/_remote_debugging/frame_cache.c
+++ b/Modules/_remote_debugging/frame_cache.c
@@ -194,6 +194,7 @@ frame_cache_lookup_and_extend(
}
// Store frame list with addresses in cache
+// Only stores complete stacks that reach base_frame_addr (validation done
internally)
// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1
= error
int
frame_cache_store(
@@ -201,12 +202,25 @@ frame_cache_store(
uint64_t thread_id,
PyObject *frame_list,
const uintptr_t *addrs,
- Py_ssize_t num_addrs)
+ Py_ssize_t num_addrs,
+ uintptr_t base_frame_addr,
+ uintptr_t last_frame_visited)
{
if (!unwinder->frame_cache || thread_id == 0) {
return 0;
}
+ // Validate we have a complete stack before caching.
+ // Only cache if last_frame_visited matches base_frame_addr (the sentinel
+ // at the bottom of the stack). Note: we use last_frame_visited rather than
+ // addrs[num_addrs-1] because the base frame is visited but not added to
the
+ // addrs array (it returns frame==NULL from is_frame_valid due to
+ // owner==FRAME_OWNED_BY_INTERPRETER).
+ if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) {
+ // Incomplete stack - don't cache (graceful degradation)
+ return 0;
+ }
+
// Clamp to max frames
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
num_addrs = FRAME_CACHE_MAX_FRAMES;
diff --git a/Modules/_remote_debugging/frames.c
b/Modules/_remote_debugging/frames.c
index abde60c45766a5..47e34e9f945cbd 100644
--- a/Modules/_remote_debugging/frames.c
+++ b/Modules/_remote_debugging/frames.c
@@ -265,7 +265,8 @@ process_frame_chain(
int *stopped_at_cached_frame,
uintptr_t *frame_addrs, // optional: C array to receive frame
addresses
Py_ssize_t *num_addrs, // in/out: current count / updated count
- Py_ssize_t max_addrs) // max capacity of frame_addrs array
+ Py_ssize_t max_addrs, // max capacity of frame_addrs array
+ uintptr_t *out_last_frame_addr) // optional: receives last frame address
visited
{
uintptr_t frame_addr = initial_frame_addr;
uintptr_t prev_frame_addr = 0;
@@ -273,10 +274,13 @@ process_frame_chain(
const size_t MAX_FRAMES = 1024 + 512;
size_t frame_count = 0;
- // Initialize output flag
+ // Initialize output parameters
if (stopped_at_cached_frame) {
*stopped_at_cached_frame = 0;
}
+ if (out_last_frame_addr) {
+ *out_last_frame_addr = 0;
+ }
// Quick check: if current_frame == last_profiled_frame, entire stack is
unchanged
if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame)
{
@@ -390,6 +394,11 @@ process_frame_chain(
return -1;
}
+ // Set output parameter for caller (needed for cache validation)
+ if (out_last_frame_addr) {
+ *out_last_frame_addr = last_frame_addr;
+ }
+
return 0;
}
@@ -537,6 +546,7 @@ collect_frames_with_cache(
uintptr_t frame_addr,
StackChunkList *chunks,
PyObject *frame_info,
+ uintptr_t base_frame_addr,
uintptr_t gc_frame,
uintptr_t last_profiled_frame,
uint64_t thread_id)
@@ -551,11 +561,13 @@ collect_frames_with_cache(
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
Py_ssize_t num_addrs = 0;
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
+ uintptr_t last_frame_visited = 0;
int stopped_at_cached = 0;
- if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0,
gc_frame,
+ if (process_frame_chain(unwinder, frame_addr, chunks, frame_info,
base_frame_addr, gc_frame,
last_profiled_frame, &stopped_at_cached,
- addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
+ addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
+ &last_frame_visited) < 0) {
return -1;
}
@@ -575,23 +587,28 @@ collect_frames_with_cache(
// Cache miss - continue walking from last_profiled_frame to get
the rest
STATS_INC(unwinder, frame_cache_misses);
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
- if (process_frame_chain(unwinder, last_profiled_frame, chunks,
frame_info, 0, gc_frame,
- 0, NULL, addrs, &num_addrs,
FRAME_CACHE_MAX_FRAMES) < 0) {
+ if (process_frame_chain(unwinder, last_profiled_frame, chunks,
frame_info, base_frame_addr, gc_frame,
+ 0, NULL, addrs, &num_addrs,
FRAME_CACHE_MAX_FRAMES,
+ &last_frame_visited) < 0) {
return -1;
}
STATS_ADD(unwinder, frames_read_from_memory,
PyList_GET_SIZE(frame_info) - frames_before_walk);
} else {
- // Partial cache hit
+ // Partial cache hit - cache was validated when stored, so we
trust it
STATS_INC(unwinder, frame_cache_partial_hits);
STATS_ADD(unwinder, frames_read_from_cache,
PyList_GET_SIZE(frame_info) - frames_before_cache);
}
- } else if (last_profiled_frame == 0) {
- // No cache involvement (no last_profiled_frame or cache disabled)
- STATS_INC(unwinder, frame_cache_misses);
+ } else {
+ if (last_profiled_frame == 0) {
+ // No cache involvement (no last_profiled_frame or cache disabled)
+ STATS_INC(unwinder, frame_cache_misses);
+ }
}
- // Store in cache (frame_cache_store handles truncation if num_addrs >
FRAME_CACHE_MAX_FRAMES)
- if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) <
0) {
+ // Store in cache - frame_cache_store validates internally that we have a
+ // complete stack (reached base_frame_addr) before actually storing
+ if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs,
+ base_frame_addr, last_frame_visited) < 0) {
return -1;
}
diff --git a/Modules/_remote_debugging/threads.c
b/Modules/_remote_debugging/threads.c
index 81c13ea48e3c49..6db774ecfc269e 100644
--- a/Modules/_remote_debugging/threads.c
+++ b/Modules/_remote_debugging/threads.c
@@ -430,7 +430,7 @@ unwind_stack_for_thread(
uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
unwinder->debug_offsets.thread_state.last_profiled_frame);
if (collect_frames_with_cache(unwinder, frame_addr, &chunks,
frame_info,
- gc_frame, last_profiled_frame, tid) < 0)
{
+ base_frame_addr, gc_frame,
last_profiled_frame, tid) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to
collect frames");
goto error;
}
@@ -444,7 +444,7 @@ unwind_stack_for_thread(
} else {
// No caching - process entire frame chain with base_frame validation
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
- base_frame_addr, gc_frame, 0, NULL, NULL,
NULL, 0) < 0) {
+ base_frame_addr, gc_frame, 0, NULL, NULL,
NULL, 0, NULL) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to
process frame chain");
goto error;
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]