https://github.com/python/cpython/commit/89a914c58db1661cb9da4f3b9e52c20bb4b02287
commit: 89a914c58db1661cb9da4f3b9e52c20bb4b02287
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-11-17T12:46:26Z
summary:
gh-135953: Add GIL contention markers to sampling profiler Gecko format
(#139485)
This commit enhances the Gecko format reporter in the sampling profiler
to include markers for GIL acquisition events.
files:
M Include/cpython/pystate.h
M Include/internal/pycore_debug_offsets.h
M Lib/profiling/sampling/collector.py
M Lib/profiling/sampling/gecko_collector.py
M Lib/profiling/sampling/sample.py
M Lib/test/test_external_inspection.py
M Lib/test/test_profiling/test_sampling_profiler.py
M Modules/_remote_debugging_module.c
M Python/ceval_gil.c
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index c53abe43ebe65c..1e1e46ea4c0bcd 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -113,6 +113,9 @@ struct _ts {
/* Currently holds the GIL. Must be its own field to avoid data races */
int holds_gil;
+ /* Currently requesting the GIL */
+ int gil_requested;
+
int _whence;
/* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED,
_Py_THREAD_SUSPENDED).
diff --git a/Include/internal/pycore_debug_offsets.h
b/Include/internal/pycore_debug_offsets.h
index 8e7cd16acffa48..f6d50bf5df7a9e 100644
--- a/Include/internal/pycore_debug_offsets.h
+++ b/Include/internal/pycore_debug_offsets.h
@@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets {
uint64_t native_thread_id;
uint64_t datastack_chunk;
uint64_t status;
+ uint64_t holds_gil;
+ uint64_t gil_requested;
} thread_state;
// InterpreterFrame offset;
@@ -273,6 +275,8 @@ typedef struct _Py_DebugOffsets {
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
.status = offsetof(PyThreadState, _status), \
+ .holds_gil = offsetof(PyThreadState, holds_gil), \
+ .gil_requested = offsetof(PyThreadState, gil_requested), \
}, \
.interpreter_frame = { \
.size = sizeof(_PyInterpreterFrame), \
diff --git a/Lib/profiling/sampling/collector.py
b/Lib/profiling/sampling/collector.py
index b7a033ac0a6637..3c2325ef77268c 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -1,17 +1,14 @@
from abc import ABC, abstractmethod
-# Enums are slow
-THREAD_STATE_RUNNING = 0
-THREAD_STATE_IDLE = 1
-THREAD_STATE_GIL_WAIT = 2
-THREAD_STATE_UNKNOWN = 3
-
-STATUS = {
- THREAD_STATE_RUNNING: "running",
- THREAD_STATE_IDLE: "idle",
- THREAD_STATE_GIL_WAIT: "gil_wait",
- THREAD_STATE_UNKNOWN: "unknown",
-}
+# Thread status flags
+try:
+ from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU,
THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
+except ImportError:
+ # Fallback for tests or when module is not available
+ THREAD_STATUS_HAS_GIL = (1 << 0)
+ THREAD_STATUS_ON_CPU = (1 << 1)
+ THREAD_STATUS_UNKNOWN = (1 << 2)
+ THREAD_STATUS_GIL_REQUESTED = (1 << 3)
class Collector(ABC):
@abstractmethod
@@ -26,8 +23,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
"""Iterate over all frame stacks from all interpreters and threads."""
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
- if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
- continue
+ # skip_idle now means: skip if thread is not actively running
+ # A thread is "active" if it has the GIL OR is on CPU
+ if skip_idle:
+ status_flags = thread_info.status
+ has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
+ on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
+ if not (has_gil or on_cpu):
+ continue
frames = thread_info.frame_info
if frames:
yield frames, thread_info.thread_id
diff --git a/Lib/profiling/sampling/gecko_collector.py
b/Lib/profiling/sampling/gecko_collector.py
index 548acbf24b7fd2..6c6700f113083e 100644
--- a/Lib/profiling/sampling/gecko_collector.py
+++ b/Lib/profiling/sampling/gecko_collector.py
@@ -1,9 +1,20 @@
+import itertools
import json
import os
import platform
+import sys
+import threading
import time
-from .collector import Collector, THREAD_STATE_RUNNING
+from .collector import Collector
+try:
+ from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU,
THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
+except ImportError:
+ # Fallback if module not available (shouldn't happen in normal use)
+ THREAD_STATUS_HAS_GIL = (1 << 0)
+ THREAD_STATUS_ON_CPU = (1 << 1)
+ THREAD_STATUS_UNKNOWN = (1 << 2)
+ THREAD_STATUS_GIL_REQUESTED = (1 << 3)
# Categories matching Firefox Profiler expectations
@@ -11,14 +22,20 @@
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
{"name": "Python", "color": "yellow", "subcategories": ["Other"]},
{"name": "Native", "color": "blue", "subcategories": ["Other"]},
- {"name": "Idle", "color": "transparent", "subcategories": ["Other"]},
+ {"name": "GC", "color": "orange", "subcategories": ["Other"]},
+ {"name": "GIL", "color": "green", "subcategories": ["Other"]},
+ {"name": "CPU", "color": "purple", "subcategories": ["Other"]},
+ {"name": "Code Type", "color": "red", "subcategories": ["Other"]},
]
# Category indices
CATEGORY_OTHER = 0
CATEGORY_PYTHON = 1
CATEGORY_NATIVE = 2
-CATEGORY_IDLE = 3
+CATEGORY_GC = 3
+CATEGORY_GIL = 4
+CATEGORY_CPU = 5
+CATEGORY_CODE_TYPE = 6
# Subcategory indices
DEFAULT_SUBCATEGORY = 0
@@ -58,6 +75,56 @@ def __init__(self, *, skip_idle=False):
self.last_sample_time = 0
self.interval = 1.0 # Will be calculated from actual sampling
+ # State tracking for interval markers (tid -> start_time)
+ self.has_gil_start = {} # Thread has the GIL
+ self.no_gil_start = {} # Thread doesn't have the GIL
+ self.on_cpu_start = {} # Thread is running on CPU
+ self.off_cpu_start = {} # Thread is off CPU
+ self.python_code_start = {} # Thread running Python code (has
GIL)
+ self.native_code_start = {} # Thread running native code (on CPU
without GIL)
+ self.gil_wait_start = {} # Thread waiting for GIL
+
+ # GC event tracking: track GC start time per thread
+ self.gc_start_per_thread = {} # tid -> start_time
+
+ # Track which threads have been initialized for state tracking
+ self.initialized_threads = set()
+
+ def _track_state_transition(self, tid, condition, active_dict,
inactive_dict,
+ active_name, inactive_name, category,
current_time):
+ """Track binary state transitions and emit markers.
+
+ Args:
+ tid: Thread ID
+ condition: Whether the active state is true
+ active_dict: Dict tracking start time of active state
+ inactive_dict: Dict tracking start time of inactive state
+ active_name: Name for active state marker
+ inactive_name: Name for inactive state marker
+ category: Gecko category for the markers
+ current_time: Current timestamp
+ """
+ # On first observation of a thread, just record the current state
+ # without creating a marker (we don't know what the previous state was)
+ if tid not in self.initialized_threads:
+ if condition:
+ active_dict[tid] = current_time
+ else:
+ inactive_dict[tid] = current_time
+ return
+
+ # For already-initialized threads, track transitions
+ if condition:
+ active_dict.setdefault(tid, current_time)
+ if tid in inactive_dict:
+ self._add_marker(tid, inactive_name, inactive_dict.pop(tid),
+ current_time, category)
+ else:
+ inactive_dict.setdefault(tid, current_time)
+ if tid in active_dict:
+ self._add_marker(tid, active_name, active_dict.pop(tid),
+ current_time, category)
+
def collect(self, stack_frames):
"""Collect a sample from stack frames."""
current_time = (time.time() * 1000) - self.start_time
@@ -69,19 +136,12 @@ def collect(self, stack_frames):
) / self.sample_count
self.last_sample_time = current_time
+ # Process threads and track GC per thread
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
- if (
- self.skip_idle
- and thread_info.status != THREAD_STATE_RUNNING
- ):
- continue
-
frames = thread_info.frame_info
- if not frames:
- continue
-
tid = thread_info.thread_id
+ gc_collecting = thread_info.gc_collecting
# Initialize thread if needed
if tid not in self.threads:
@@ -89,6 +149,80 @@ def collect(self, stack_frames):
thread_data = self.threads[tid]
+ # Decode status flags
+ status_flags = thread_info.status
+ has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
+ on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
+ gil_requested = bool(status_flags &
THREAD_STATUS_GIL_REQUESTED)
+
+ # Track GIL possession (Has GIL / No GIL)
+ self._track_state_transition(
+ tid, has_gil, self.has_gil_start, self.no_gil_start,
+ "Has GIL", "No GIL", CATEGORY_GIL, current_time
+ )
+
+ # Track CPU state (On CPU / Off CPU)
+ self._track_state_transition(
+ tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
+ "On CPU", "Off CPU", CATEGORY_CPU, current_time
+ )
+
+ # Track code type (Python Code / Native Code)
+ # This is tri-state: Python (has_gil), Native (on_cpu without
gil), or Neither
+ if has_gil:
+ self._track_state_transition(
+ tid, True, self.python_code_start,
self.native_code_start,
+ "Python Code", "Native Code", CATEGORY_CODE_TYPE,
current_time
+ )
+ elif on_cpu:
+ self._track_state_transition(
+ tid, True, self.native_code_start,
self.python_code_start,
+ "Native Code", "Python Code", CATEGORY_CODE_TYPE,
current_time
+ )
+ else:
+ # Thread is idle (neither has GIL nor on CPU) - close any
open code markers
+ # This handles the third state that
_track_state_transition doesn't cover
+ if tid in self.initialized_threads:
+ if tid in self.python_code_start:
+ self._add_marker(tid, "Python Code",
self.python_code_start.pop(tid),
+ current_time, CATEGORY_CODE_TYPE)
+ if tid in self.native_code_start:
+ self._add_marker(tid, "Native Code",
self.native_code_start.pop(tid),
+ current_time, CATEGORY_CODE_TYPE)
+
+ # Track "Waiting for GIL" intervals (one-sided tracking)
+ if gil_requested:
+ self.gil_wait_start.setdefault(tid, current_time)
+ elif tid in self.gil_wait_start:
+ self._add_marker(tid, "Waiting for GIL",
self.gil_wait_start.pop(tid),
+ current_time, CATEGORY_GIL)
+
+ # Track GC events - attribute to all threads that hold the GIL
during GC
+ # (GC is interpreter-wide but runs on whichever thread(s) have
the GIL)
+ # If GIL switches during GC, multiple threads will get GC
markers
+ if gc_collecting and has_gil:
+ # Start GC marker if not already started for this thread
+ if tid not in self.gc_start_per_thread:
+ self.gc_start_per_thread[tid] = current_time
+ elif tid in self.gc_start_per_thread:
+ # End GC marker if it was running for this thread
+ # (either GC finished or thread lost GIL)
+ self._add_marker(tid, "GC Collecting",
self.gc_start_per_thread.pop(tid),
+ current_time, CATEGORY_GC)
+
+ # Mark thread as initialized after processing all state
transitions
+ self.initialized_threads.add(tid)
+
+ # Categorize: idle if neither has GIL nor on CPU
+ is_idle = not has_gil and not on_cpu
+
+ # Skip idle threads if skip_idle is enabled
+ if self.skip_idle and is_idle:
+ continue
+
+ if not frames:
+ continue
+
# Process the stack
stack_index = self._process_stack(thread_data, frames)
@@ -102,7 +236,6 @@ def collect(self, stack_frames):
def _create_thread(self, tid):
"""Create a new thread structure with processed profile format."""
- import threading
# Determine if this is the main thread
try:
@@ -181,7 +314,7 @@ def _create_thread(self, tid):
"functionSize": [],
"length": 0,
},
- # Markers - processed format
+ # Markers - processed format (arrays)
"markers": {
"data": [],
"name": [],
@@ -215,6 +348,27 @@ def _intern_string(self, s):
self.global_string_map[s] = idx
return idx
+ def _add_marker(self, tid, name, start_time, end_time, category):
+ """Add an interval marker for a specific thread."""
+ if tid not in self.threads:
+ return
+
+ thread_data = self.threads[tid]
+ duration = end_time - start_time
+
+ name_idx = self._intern_string(name)
+ markers = thread_data["markers"]
+ markers["name"].append(name_idx)
+ markers["startTime"].append(start_time)
+ markers["endTime"].append(end_time)
+ markers["phase"].append(1) # 1 = interval marker
+ markers["category"].append(category)
+ markers["data"].append({
+ "type": name.replace(" ", ""),
+ "duration": duration,
+ "tid": tid
+ })
+
def _process_stack(self, thread_data, frames):
"""Process a stack and return the stack index."""
if not frames:
@@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx,
lineno):
frame_cache[frame_key] = frame_idx
return frame_idx
+ def _finalize_markers(self):
+ """Close any open markers at the end of profiling."""
+ end_time = self.last_sample_time
+
+ # Close all open markers for each thread using a generic approach
+ marker_states = [
+ (self.has_gil_start, "Has GIL", CATEGORY_GIL),
+ (self.no_gil_start, "No GIL", CATEGORY_GIL),
+ (self.on_cpu_start, "On CPU", CATEGORY_CPU),
+ (self.off_cpu_start, "Off CPU", CATEGORY_CPU),
+ (self.python_code_start, "Python Code", CATEGORY_CODE_TYPE),
+ (self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
+ (self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
+ (self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
+ ]
+
+ for state_dict, marker_name, category in marker_states:
+ for tid in list(state_dict.keys()):
+ self._add_marker(tid, marker_name, state_dict[tid], end_time,
category)
+ del state_dict[tid]
+
def export(self, filename):
"""Export the profile to a Gecko JSON file."""
+
if self.sample_count > 0 and self.last_sample_time > 0:
self.interval = self.last_sample_time / self.sample_count
- profile = self._build_profile()
+ # Spinner for progress indication
+ spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧',
'⠇', '⠏'])
+ stop_spinner = threading.Event()
+
+ def spin():
+ message = 'Building Gecko profile...'
+ while not stop_spinner.is_set():
+ sys.stderr.write(f'\r{next(spinner)} {message}')
+ sys.stderr.flush()
+ time.sleep(0.1)
+ # Clear the spinner line
+ sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r')
+ sys.stderr.flush()
+
+ spinner_thread = threading.Thread(target=spin, daemon=True)
+ spinner_thread.start()
+
+ try:
+ # Finalize any open markers before building profile
+ self._finalize_markers()
+
+ profile = self._build_profile()
- with open(filename, "w") as f:
- json.dump(profile, f, separators=(",", ":"))
+ with open(filename, "w") as f:
+ json.dump(profile, f, separators=(",", ":"))
+ finally:
+ stop_spinner.set()
+ spinner_thread.join(timeout=1.0)
+ # Small delay to ensure the clear happens
+ time.sleep(0.01)
print(f"Gecko profile written to {filename}")
print(
@@ -416,6 +618,7 @@ def _build_profile(self):
frame_table["length"] = len(frame_table["func"])
func_table["length"] = len(func_table["name"])
resource_table["length"] = len(resource_table["name"])
+ thread_data["markers"]["length"] =
len(thread_data["markers"]["name"])
# Clean up internal caches
del thread_data["_stackCache"]
diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index 7a0f739a5428c6..5ca68911d8a482 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -21,6 +21,7 @@
PROFILING_MODE_WALL = 0
PROFILING_MODE_CPU = 1
PROFILING_MODE_GIL = 2
+PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
def _parse_mode(mode_string):
@@ -136,18 +137,20 @@ def _run_with_sync(original_cmd):
class SampleProfiler:
- def __init__(self, pid, sample_interval_usec, all_threads, *,
mode=PROFILING_MODE_WALL):
+ def __init__(self, pid, sample_interval_usec, all_threads, *,
mode=PROFILING_MODE_WALL, skip_non_matching_threads=True):
self.pid = pid
self.sample_interval_usec = sample_interval_usec
self.all_threads = all_threads
if _FREE_THREADED_BUILD:
self.unwinder = _remote_debugging.RemoteUnwinder(
- self.pid, all_threads=self.all_threads, mode=mode
+ self.pid, all_threads=self.all_threads, mode=mode,
+ skip_non_matching_threads=skip_non_matching_threads
)
else:
only_active_threads = bool(self.all_threads)
self.unwinder = _remote_debugging.RemoteUnwinder(
- self.pid, only_active_thread=only_active_threads, mode=mode
+ self.pid, only_active_thread=only_active_threads, mode=mode,
+ skip_non_matching_threads=skip_non_matching_threads
)
# Track sample intervals and total sample count
self.sample_intervals = deque(maxlen=100)
@@ -614,14 +617,21 @@ def sample(
realtime_stats=False,
mode=PROFILING_MODE_WALL,
):
+ # PROFILING_MODE_ALL implies no skipping at all
+ if mode == PROFILING_MODE_ALL:
+ skip_non_matching_threads = False
+ skip_idle = False
+ else:
+ # Determine skip settings based on output format and mode
+ skip_non_matching_threads = output_format != "gecko"
+ skip_idle = mode != PROFILING_MODE_WALL
+
profiler = SampleProfiler(
- pid, sample_interval_usec, all_threads=all_threads, mode=mode
+ pid, sample_interval_usec, all_threads=all_threads, mode=mode,
+ skip_non_matching_threads=skip_non_matching_threads
)
profiler.realtime_stats = realtime_stats
- # Determine skip_idle for collector compatibility
- skip_idle = mode != PROFILING_MODE_WALL
-
collector = None
match output_format:
case "pstats":
@@ -633,7 +643,8 @@ def sample(
collector = FlamegraphCollector(skip_idle=skip_idle)
filename = filename or f"flamegraph.{pid}.html"
case "gecko":
- collector = GeckoCollector(skip_idle=skip_idle)
+ # Gecko format never skips idle threads to show full thread states
+ collector = GeckoCollector(skip_idle=False)
filename = filename or f"gecko.{pid}.json"
case _:
raise ValueError(f"Invalid output format: {output_format}")
@@ -882,6 +893,10 @@ def main():
if args.format in ("collapsed", "gecko"):
_validate_collapsed_format_args(args, parser)
+ # Validate that --mode is not used with --gecko
+ if args.format == "gecko" and args.mode != "wall":
+ parser.error("--mode option is incompatible with --gecko format. Gecko
format automatically uses ALL mode (GIL + CPU analysis).")
+
sort_value = args.sort if args.sort is not None else 2
if args.module is not None and not args.module:
@@ -900,7 +915,11 @@ def main():
elif target_count > 1:
parser.error("only one target type can be specified: -p/--pid,
-m/--module, or script")
- mode = _parse_mode(args.mode)
+ # Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice
+ if args.format == "gecko":
+ mode = PROFILING_MODE_ALL
+ else:
+ mode = _parse_mode(args.mode)
if args.pid:
sample(
diff --git a/Lib/test/test_external_inspection.py
b/Lib/test/test_external_inspection.py
index 01720457e61f5c..60e5000cd72a32 100644
--- a/Lib/test/test_external_inspection.py
+++ b/Lib/test/test_external_inspection.py
@@ -23,6 +23,12 @@
PROFILING_MODE_WALL = 0
PROFILING_MODE_CPU = 1
PROFILING_MODE_GIL = 2
+PROFILING_MODE_ALL = 3
+
+# Thread status flags
+THREAD_STATUS_HAS_GIL = (1 << 0)
+THREAD_STATUS_ON_CPU = (1 << 1)
+THREAD_STATUS_UNKNOWN = (1 << 2)
try:
from concurrent import interpreters
@@ -1763,11 +1769,14 @@ def busy():
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] =
thread_info.status
- # Check if sleeper thread is idle and busy thread is
running
+ # Check if sleeper thread is off CPU and busy thread
is on CPU
+ # In the new flags system:
+ # - sleeper should NOT have ON_CPU flag (off CPU)
+ # - busy should have ON_CPU flag
if (sleeper_tid in statuses and
busy_tid in statuses and
- statuses[sleeper_tid] == 1 and
- statuses[busy_tid] == 0):
+ not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU)
and
+ (statuses[busy_tid] & THREAD_STATUS_ON_CPU)):
break
time.sleep(0.5) # Give a bit of time to let threads
settle
except PermissionError:
@@ -1779,8 +1788,8 @@ def busy():
self.assertIsNotNone(busy_tid, "Busy thread id not received")
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in
sampled threads")
self.assertIn(busy_tid, statuses, "Busy tid not found in
sampled threads")
- self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread
should be idle (1)")
- self.assertEqual(statuses[busy_tid], 0, "Busy thread should be
running (0)")
+ self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU,
"Sleeper thread should be off CPU")
+ self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU,
"Busy thread should be on CPU")
finally:
if client_socket is not None:
@@ -1875,11 +1884,14 @@ def busy():
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] =
thread_info.status
- # Check if sleeper thread is idle (status 2 for GIL
mode) and busy thread is running
+ # Check if sleeper thread doesn't have GIL and busy
thread has GIL
+ # In the new flags system:
+ # - sleeper should NOT have HAS_GIL flag (waiting for
GIL)
+ # - busy should have HAS_GIL flag
if (sleeper_tid in statuses and
busy_tid in statuses and
- statuses[sleeper_tid] == 2 and
- statuses[busy_tid] == 0):
+ not (statuses[sleeper_tid] &
THREAD_STATUS_HAS_GIL) and
+ (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
break
time.sleep(0.5) # Give a bit of time to let threads
settle
except PermissionError:
@@ -1891,8 +1903,8 @@ def busy():
self.assertIsNotNone(busy_tid, "Busy thread id not received")
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in
sampled threads")
self.assertIn(busy_tid, statuses, "Busy tid not found in
sampled threads")
- self.assertEqual(statuses[sleeper_tid], 2, "Sleeper thread
should be idle (1)")
- self.assertEqual(statuses[busy_tid], 0, "Busy thread should be
running (0)")
+ self.assertFalse(statuses[sleeper_tid] &
THREAD_STATUS_HAS_GIL, "Sleeper thread should not have GIL")
+ self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL,
"Busy thread should have GIL")
finally:
if client_socket is not None:
@@ -1900,6 +1912,128 @@ def busy():
p.terminate()
p.wait(timeout=SHORT_TIMEOUT)
+ @unittest.skipIf(
+ sys.platform not in ("linux", "darwin", "win32"),
+ "Test only runs on supported platforms (Linux, macOS, or Windows)",
+ )
+ @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific
exception")
+ def test_thread_status_all_mode_detection(self):
+ port = find_unused_port()
+ script = textwrap.dedent(
+ f"""\
+ import socket
+ import threading
+ import time
+ import sys
+
+ def sleeper_thread():
+ conn = socket.create_connection(("localhost", {port}))
+ conn.sendall(b"sleeper:" +
str(threading.get_native_id()).encode())
+ while True:
+ time.sleep(1)
+
+ def busy_thread():
+ conn = socket.create_connection(("localhost", {port}))
+ conn.sendall(b"busy:" +
str(threading.get_native_id()).encode())
+ while True:
+ sum(range(100000))
+
+ t1 = threading.Thread(target=sleeper_thread)
+ t2 = threading.Thread(target=busy_thread)
+ t1.start()
+ t2.start()
+ t1.join()
+ t2.join()
+ """
+ )
+
+ with os_helper.temp_dir() as tmp_dir:
+ script_file = make_script(tmp_dir, "script", script)
+ server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ server_socket.bind(("localhost", port))
+ server_socket.listen(2)
+ server_socket.settimeout(SHORT_TIMEOUT)
+
+ p = subprocess.Popen(
+ [sys.executable, script_file],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
+ client_sockets = []
+ try:
+ sleeper_tid = None
+ busy_tid = None
+
+ # Receive thread IDs from the child process
+ for _ in range(2):
+ client_socket, _ = server_socket.accept()
+ client_sockets.append(client_socket)
+ line = client_socket.recv(1024)
+ if line:
+ if line.startswith(b"sleeper:"):
+ try:
+ sleeper_tid = int(line.split(b":")[-1])
+ except Exception:
+ pass
+ elif line.startswith(b"busy:"):
+ try:
+ busy_tid = int(line.split(b":")[-1])
+ except Exception:
+ pass
+
+ server_socket.close()
+
+ attempts = 10
+ statuses = {}
+ try:
+ unwinder = RemoteUnwinder(p.pid, all_threads=True,
mode=PROFILING_MODE_ALL,
+
skip_non_matching_threads=False)
+ for _ in range(attempts):
+ traces = unwinder.get_stack_trace()
+ # Find threads and their statuses
+ statuses = {}
+ for interpreter_info in traces:
+ for thread_info in interpreter_info.threads:
+ statuses[thread_info.thread_id] =
thread_info.status
+
+ # Check ALL mode provides both GIL and CPU info
+ # - sleeper should NOT have ON_CPU and NOT have HAS_GIL
+ # - busy should have ON_CPU and have HAS_GIL
+ if (sleeper_tid in statuses and
+ busy_tid in statuses and
+ not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU)
and
+ not (statuses[sleeper_tid] &
THREAD_STATUS_HAS_GIL) and
+ (statuses[busy_tid] & THREAD_STATUS_ON_CPU) and
+ (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
+ break
+ time.sleep(0.5)
+ except PermissionError:
+ self.skipTest(
+ "Insufficient permissions to read the stack trace"
+ )
+
+ self.assertIsNotNone(sleeper_tid, "Sleeper thread id not
received")
+ self.assertIsNotNone(busy_tid, "Busy thread id not received")
+ self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in
sampled threads")
+ self.assertIn(busy_tid, statuses, "Busy tid not found in
sampled threads")
+
+ # Sleeper thread: off CPU, no GIL
+ self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU,
"Sleeper should be off CPU")
+ self.assertFalse(statuses[sleeper_tid] &
THREAD_STATUS_HAS_GIL, "Sleeper should not have GIL")
+
+ # Busy thread: on CPU, has GIL
+ self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU,
"Busy should be on CPU")
+ self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL,
"Busy should have GIL")
+
+ finally:
+ for client_socket in client_sockets:
+ client_socket.close()
+ p.terminate()
+ p.wait(timeout=SHORT_TIMEOUT)
+ p.stdout.close()
+ p.stderr.close()
if __name__ == "__main__":
diff --git a/Lib/test/test_profiling/test_sampling_profiler.py
b/Lib/test/test_profiling/test_sampling_profiler.py
index 0ba6799a1ce5ba..ae9bf3ef2e50e4 100644
--- a/Lib/test/test_profiling/test_sampling_profiler.py
+++ b/Lib/test/test_profiling/test_sampling_profiler.py
@@ -63,12 +63,14 @@ def __repr__(self):
class MockThreadInfo:
"""Mock ThreadInfo for testing since the real one isn't accessible."""
- def __init__(self, thread_id, frame_info):
+ def __init__(self, thread_id, frame_info, status=0, gc_collecting=False):
# Default to THREAD_STATE_RUNNING (0)
self.thread_id = thread_id
self.frame_info = frame_info
+ self.status = status
+ self.gc_collecting = gc_collecting
def __repr__(self):
- return f"MockThreadInfo(thread_id={self.thread_id},
frame_info={self.frame_info})"
+ return f"MockThreadInfo(thread_id={self.thread_id},
frame_info={self.frame_info}, status={self.status},
gc_collecting={self.gc_collecting})"
class MockInterpreterInfo:
@@ -674,6 +676,97 @@ def test_gecko_collector_export(self):
self.assertIn("func2", string_array)
self.assertIn("other_func", string_array)
+ def test_gecko_collector_markers(self):
+ """Test Gecko profile markers for GIL and CPU state tracking."""
+ try:
+ from _remote_debugging import THREAD_STATUS_HAS_GIL,
THREAD_STATUS_ON_CPU, THREAD_STATUS_GIL_REQUESTED
+ except ImportError:
+ THREAD_STATUS_HAS_GIL = (1 << 0)
+ THREAD_STATUS_ON_CPU = (1 << 1)
+ THREAD_STATUS_GIL_REQUESTED = (1 << 3)
+
+ collector = GeckoCollector()
+
+ # Status combinations for different thread states
+ HAS_GIL_ON_CPU = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU #
Running Python code
+ NO_GIL_ON_CPU = THREAD_STATUS_ON_CPU # Running native code
+ WAITING_FOR_GIL = THREAD_STATUS_GIL_REQUESTED # Waiting for GIL
+
+ # Simulate thread state transitions
+ collector.collect([
+ MockInterpreterInfo(0, [
+ MockThreadInfo(1, [("test.py", 10, "python_func")],
status=HAS_GIL_ON_CPU)
+ ])
+ ])
+
+ collector.collect([
+ MockInterpreterInfo(0, [
+ MockThreadInfo(1, [("test.py", 15, "wait_func")],
status=WAITING_FOR_GIL)
+ ])
+ ])
+
+ collector.collect([
+ MockInterpreterInfo(0, [
+ MockThreadInfo(1, [("test.py", 20, "python_func2")],
status=HAS_GIL_ON_CPU)
+ ])
+ ])
+
+ collector.collect([
+ MockInterpreterInfo(0, [
+ MockThreadInfo(1, [("native.c", 100, "native_func")],
status=NO_GIL_ON_CPU)
+ ])
+ ])
+
+ profile_data = collector._build_profile()
+
+ # Verify we have threads with markers
+ self.assertIn("threads", profile_data)
+ self.assertEqual(len(profile_data["threads"]), 1)
+ thread_data = profile_data["threads"][0]
+
+ # Check markers exist
+ self.assertIn("markers", thread_data)
+ markers = thread_data["markers"]
+
+ # Should have marker arrays
+ self.assertIn("name", markers)
+ self.assertIn("startTime", markers)
+ self.assertIn("endTime", markers)
+ self.assertIn("category", markers)
+ self.assertGreater(markers["length"], 0, "Should have generated
markers")
+
+ # Get marker names from string table
+ string_array = profile_data["shared"]["stringArray"]
+ marker_names = [string_array[idx] for idx in markers["name"]]
+
+ # Verify we have different marker types
+ marker_name_set = set(marker_names)
+
+ # Should have "Has GIL" markers (when thread had GIL)
+ self.assertIn("Has GIL", marker_name_set, "Should have 'Has GIL'
markers")
+
+ # Should have "No GIL" markers (when thread didn't have GIL)
+ self.assertIn("No GIL", marker_name_set, "Should have 'No GIL'
markers")
+
+ # Should have "On CPU" markers (when thread was on CPU)
+ self.assertIn("On CPU", marker_name_set, "Should have 'On CPU'
markers")
+
+ # Should have "Waiting for GIL" markers (when thread was waiting)
+ self.assertIn("Waiting for GIL", marker_name_set, "Should have
'Waiting for GIL' markers")
+
+ # Verify marker structure
+ for i in range(markers["length"]):
+ # All markers should be interval markers (phase = 1)
+ self.assertEqual(markers["phase"][i], 1, f"Marker {i} should be
interval marker")
+
+ # All markers should have valid time range
+ start_time = markers["startTime"][i]
+ end_time = markers["endTime"][i]
+ self.assertLessEqual(start_time, end_time, f"Marker {i} should
have valid time range")
+
+ # All markers should have valid category
+ self.assertGreaterEqual(markers["category"][i], 0, f"Marker {i}
should have valid category")
+
def test_pstats_collector_export(self):
collector = PstatsCollector(
sample_interval_usec=1000000
@@ -2625,19 +2718,30 @@ def test_mode_validation(self):
def test_frames_filtered_with_skip_idle(self):
"""Test that frames are actually filtered when skip_idle=True."""
+ # Import thread status flags
+ try:
+ from _remote_debugging import THREAD_STATUS_HAS_GIL,
THREAD_STATUS_ON_CPU
+ except ImportError:
+ THREAD_STATUS_HAS_GIL = (1 << 0)
+ THREAD_STATUS_ON_CPU = (1 << 1)
+
# Create mock frames with different thread statuses
class MockThreadInfoWithStatus:
def __init__(self, thread_id, frame_info, status):
self.thread_id = thread_id
self.frame_info = frame_info
self.status = status
+ self.gc_collecting = False
+
+ # Create test data: active thread (HAS_GIL | ON_CPU), idle thread
(neither), and another active thread
+ ACTIVE_STATUS = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU # Has
GIL and on CPU
+ IDLE_STATUS = 0 # Neither has GIL nor on CPU
- # Create test data: running thread, idle thread, and another running
thread
test_frames = [
MockInterpreterInfo(0, [
- MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10,
"active_func1")], 0), # RUNNING
- MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20,
"idle_func")], 1), # IDLE
- MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30,
"active_func2")], 0), # RUNNING
+ MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10,
"active_func1")], ACTIVE_STATUS),
+ MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20,
"idle_func")], IDLE_STATUS),
+ MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30,
"active_func2")], ACTIVE_STATUS),
])
]
diff --git a/Modules/_remote_debugging_module.c
b/Modules/_remote_debugging_module.c
index c6ced39c70cdb3..d190b3c9fafa76 100644
--- a/Modules/_remote_debugging_module.c
+++ b/Modules/_remote_debugging_module.c
@@ -11,6 +11,7 @@
* HEADERS AND INCLUDES
*
============================================================================ */
+#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
@@ -81,6 +82,8 @@ typedef enum _WIN32_THREADSTATE {
#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
#define SIZEOF_LONG_OBJ sizeof(PyLongObject)
+#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
+#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)
// Calculate the minimum buffer size needed to read interpreter state fields
// We need to read code_object_generation and potentially tlbc_generation
@@ -178,8 +181,9 @@ static PyStructSequence_Desc CoroInfo_desc = {
// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info)
static PyStructSequence_Field ThreadInfo_fields[] = {
{"thread_id", "Thread ID"},
- {"status", "Thread status"},
+ {"status", "Thread status (flags: HAS_GIL, ON_CPU, UNKNOWN or legacy
enum)"},
{"frame_info", "Frame information"},
+ {"gc_collecting", "Whether GC is collecting (interpreter-level)"},
{NULL}
};
@@ -187,7 +191,7 @@ static PyStructSequence_Desc ThreadInfo_desc = {
"_remote_debugging.ThreadInfo",
"Information about a thread",
ThreadInfo_fields,
- 2
+ 3
};
// InterpreterInfo structseq type - replaces 2-tuple (interpreter_id,
thread_list)
@@ -247,9 +251,16 @@ enum _ThreadState {
enum _ProfilingMode {
PROFILING_MODE_WALL = 0,
PROFILING_MODE_CPU = 1,
- PROFILING_MODE_GIL = 2
+ PROFILING_MODE_GIL = 2,
+ PROFILING_MODE_ALL = 3 // Combines GIL + CPU checks
};
+// Thread status flags (can be combined)
+#define THREAD_STATUS_HAS_GIL (1 << 0) // Thread has the GIL
+#define THREAD_STATUS_ON_CPU (1 << 1) // Thread is running on CPU
+#define THREAD_STATUS_UNKNOWN (1 << 2) // Status could not be
determined
+#define THREAD_STATUS_GIL_REQUESTED (1 << 3) // Thread is waiting for the GIL
+
typedef struct {
PyObject_HEAD
proc_handle_t handle;
@@ -2650,34 +2661,70 @@ unwind_stack_for_thread(
long tid = GET_MEMBER(long, ts,
unwinder->debug_offsets.thread_state.native_thread_id);
- // Calculate thread status based on mode
- int status = THREAD_STATE_UNKNOWN;
- if (unwinder->mode == PROFILING_MODE_CPU) {
- long pthread_id = GET_MEMBER(long, ts,
unwinder->debug_offsets.thread_state.thread_id);
- status = get_thread_status(unwinder, tid, pthread_id);
- if (status == -1) {
- PyErr_Print();
- PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status");
- goto error;
- }
- } else if (unwinder->mode == PROFILING_MODE_GIL) {
+ // Read GC collecting state from the interpreter (before any skip checks)
+ uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts,
unwinder->debug_offsets.thread_state.interp);
+
+ // Read the GC runtime state from the interpreter state
+ uintptr_t gc_addr = interp_addr +
unwinder->debug_offsets.interpreter_state.gc;
+ char gc_state[SIZEOF_GC_RUNTIME_STATE];
+ if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr,
unwinder->debug_offsets.gc.size, gc_state) < 0) {
+ set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC
state");
+ goto error;
+ }
+
+ int gc_collecting = GET_MEMBER(int, gc_state,
unwinder->debug_offsets.gc.collecting);
+
+ // Calculate thread status using flags (always)
+ int status_flags = 0;
+
+ // Check GIL status
+ int has_gil = 0;
+ int gil_requested = 0;
#ifdef Py_GIL_DISABLED
- // All threads are considered running in free threading builds if they
have a thread state attached
- int active = GET_MEMBER(_thread_status, ts,
unwinder->debug_offsets.thread_state.status).active;
- status = active ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
+ int active = GET_MEMBER(_thread_status, ts,
unwinder->debug_offsets.thread_state.status).active;
+ has_gil = active;
#else
- status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING
: THREAD_STATE_GIL_WAIT;
+ // Read holds_gil directly from thread state
+ has_gil = GET_MEMBER(int, ts,
unwinder->debug_offsets.thread_state.holds_gil);
+
+ // Check if thread is actively requesting the GIL
+ if (unwinder->debug_offsets.thread_state.gil_requested != 0) {
+ gil_requested = GET_MEMBER(int, ts,
unwinder->debug_offsets.thread_state.gil_requested);
+ }
+
+ // Set GIL_REQUESTED flag if thread is waiting
+ if (!has_gil && gil_requested) {
+ status_flags |= THREAD_STATUS_GIL_REQUESTED;
+ }
#endif
- } else {
- // PROFILING_MODE_WALL - all threads are considered running
- status = THREAD_STATE_RUNNING;
+ if (has_gil) {
+ status_flags |= THREAD_STATUS_HAS_GIL;
+ }
+
+ // Assert that we never have both HAS_GIL and GIL_REQUESTED set at the
same time
+ // This would indicate a race condition in the GIL state tracking
+ assert(!(has_gil && gil_requested));
+
+ // Check CPU status
+ long pthread_id = GET_MEMBER(long, ts,
unwinder->debug_offsets.thread_state.thread_id);
+ int cpu_status = get_thread_status(unwinder, tid, pthread_id);
+ if (cpu_status == -1) {
+ status_flags |= THREAD_STATUS_UNKNOWN;
+ } else if (cpu_status == THREAD_STATE_RUNNING) {
+ status_flags |= THREAD_STATUS_ON_CPU;
}
// Check if we should skip this thread based on mode
int should_skip = 0;
- if (unwinder->skip_non_matching_threads && status != THREAD_STATE_RUNNING
&&
- (unwinder->mode == PROFILING_MODE_CPU || unwinder->mode ==
PROFILING_MODE_GIL)) {
- should_skip = 1;
+ if (unwinder->skip_non_matching_threads) {
+ if (unwinder->mode == PROFILING_MODE_CPU) {
+ // Skip if not on CPU
+ should_skip = !(status_flags & THREAD_STATUS_ON_CPU);
+ } else if (unwinder->mode == PROFILING_MODE_GIL) {
+ // Skip if doesn't have GIL
+ should_skip = !(status_flags & THREAD_STATUS_HAS_GIL);
+ }
+ // PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip
}
if (should_skip) {
@@ -2719,16 +2766,25 @@ unwind_stack_for_thread(
goto error;
}
- PyObject *py_status = PyLong_FromLong(status);
+ // Always use status_flags
+ PyObject *py_status = PyLong_FromLong(status_flags);
if (py_status == NULL) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create
thread status");
goto error;
}
- PyErr_Print();
+ PyObject *py_gc_collecting = PyBool_FromLong(gc_collecting);
+ if (py_gc_collecting == NULL) {
+ set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create
gc_collecting");
+ Py_DECREF(py_status);
+ goto error;
+ }
+
+ // py_status contains status flags (bitfield)
PyStructSequence_SetItem(result, 0, thread_id);
PyStructSequence_SetItem(result, 1, py_status); // Steals reference
PyStructSequence_SetItem(result, 2, frame_info); // Steals reference
+ PyStructSequence_SetItem(result, 3, py_gc_collecting); // Steals reference
cleanup_stack_chunks(&chunks);
return result;
@@ -3401,6 +3457,21 @@ _remote_debugging_exec(PyObject *m)
if (rc < 0) {
return -1;
}
+
+ // Add thread status flag constants
+ if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_GIL",
THREAD_STATUS_HAS_GIL) < 0) {
+ return -1;
+ }
+ if (PyModule_AddIntConstant(m, "THREAD_STATUS_ON_CPU",
THREAD_STATUS_ON_CPU) < 0) {
+ return -1;
+ }
+ if (PyModule_AddIntConstant(m, "THREAD_STATUS_UNKNOWN",
THREAD_STATUS_UNKNOWN) < 0) {
+ return -1;
+ }
+ if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED",
THREAD_STATUS_GIL_REQUESTED) < 0) {
+ return -1;
+ }
+
if (RemoteDebugging_InitState(st) < 0) {
return -1;
}
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
index 9b6506ac3326b3..f6ada3892f801d 100644
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@@ -207,6 +207,7 @@ drop_gil_impl(PyThreadState *tstate, struct
_gil_runtime_state *gil)
_Py_atomic_store_int_relaxed(&gil->locked, 0);
if (tstate != NULL) {
tstate->holds_gil = 0;
+ tstate->gil_requested = 0;
}
COND_SIGNAL(gil->cond);
MUTEX_UNLOCK(gil->mutex);
@@ -320,6 +321,8 @@ take_gil(PyThreadState *tstate)
MUTEX_LOCK(gil->mutex);
+ tstate->gil_requested = 1;
+
int drop_requested = 0;
while (_Py_atomic_load_int_relaxed(&gil->locked)) {
unsigned long saved_switchnum = gil->switch_number;
@@ -407,6 +410,7 @@ take_gil(PyThreadState *tstate)
}
assert(_PyThreadState_CheckConsistency(tstate));
+ tstate->gil_requested = 0;
tstate->holds_gil = 1;
_Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
update_eval_breaker_for_thread(interp, tstate);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]